From 9baef8e18a1054eaca86c834b2cf3a729ae7d9ab Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Tue, 11 Feb 2025 11:38:58 +0000 Subject: [PATCH 01/53] Dropdown menu for llm templates. --- ankihub/entry_point.py | 5 ++ ankihub/labs/llm/__init__.py | 1 + ankihub/labs/llm/editor_dropdown.js | 70 +++++++++++++++++++++++ ankihub/labs/llm/templates.py | 89 +++++++++++++++++++++++++++++ ankihub/settings.py | 1 + 5 files changed, 166 insertions(+) create mode 100644 ankihub/labs/llm/__init__.py create mode 100644 ankihub/labs/llm/editor_dropdown.js create mode 100644 ankihub/labs/llm/templates.py diff --git a/ankihub/entry_point.py b/ankihub/entry_point.py index 331dea19d..c9e946512 100644 --- a/ankihub/entry_point.py +++ b/ankihub/entry_point.py @@ -29,6 +29,7 @@ from .gui.media_sync import media_sync from .gui.menu import menu_state, refresh_ankihub_menu, setup_ankihub_menu from .gui.operations.ankihub_sync import setup_full_sync_patch +from .labs.llm import templates as llm_templates from .main.note_deletion import handle_notes_deleted_from_webapp from .main.utils import modify_note_type_templates from .settings import ( @@ -238,6 +239,10 @@ def _general_setup(): "Set up feature flag fetching (flags will be fetched in the background)." ) + if config.labs_enabled: + llm_templates.setup() + LOGGER.info("Set up LLM templates.") + def _copy_web_media_to_media_folder(): """Copy media files from the web folder to the media folder. Existing files with the same name diff --git a/ankihub/labs/llm/__init__.py b/ankihub/labs/llm/__init__.py new file mode 100644 index 000000000..761bc6211 --- /dev/null +++ b/ankihub/labs/llm/__init__.py @@ -0,0 +1 @@ +"""LLM-related functionality for AnkiHub.""" diff --git a/ankihub/labs/llm/editor_dropdown.js b/ankihub/labs/llm/editor_dropdown.js new file mode 100644 index 000000000..c90d37ae7 --- /dev/null +++ b/ankihub/labs/llm/editor_dropdown.js @@ -0,0 +1,70 @@ +(() => { + const button = document.getElementById('{{ button_id }}'); + const rect = button.getBoundingClientRect(); + + // Remove existing dropdown if any + const existingDropdown = document.getElementById('template-dropdown'); + if (existingDropdown) { + existingDropdown.remove(); + return; + } + + // Create dropdown + const dropdown = document.createElement('div'); + dropdown.id = 'template-dropdown'; + dropdown.style.position = 'absolute'; + dropdown.style.left = rect.left + 'px'; + dropdown.style.top = (rect.bottom + 2) + 'px'; + dropdown.style.backgroundColor = 'white'; + dropdown.style.border = '1px solid #ccc'; + dropdown.style.borderRadius = '3px'; + dropdown.style.boxShadow = '0 2px 4px rgba(0,0,0,0.2)'; + dropdown.style.zIndex = '1000'; + dropdown.style.maxHeight = '300px'; + dropdown.style.overflowY = 'auto'; + + // Parse the options JSON string + const templateOptions = JSON.parse('{{ options }}'); + + if (templateOptions.length === 0) { + const item = document.createElement('div'); + item.textContent = 'No templates found'; + item.style.padding = '8px 12px'; + item.style.color = '#666'; + item.style.fontStyle = 'italic'; + dropdown.appendChild(item); + } else { + templateOptions.forEach(opt => { + const item = document.createElement('div'); + item.textContent = opt; + item.style.padding = '8px 12px'; + item.style.cursor = 'pointer'; + item.style.whiteSpace = 'nowrap'; + + item.addEventListener('mouseover', () => { + item.style.backgroundColor = '#f0f0f0'; + }); + + item.addEventListener('mouseout', () => { + item.style.backgroundColor = 'white'; + }); + + item.addEventListener('click', () => { + pycmd(`template-select:${opt}`); + dropdown.remove(); + }); + + dropdown.appendChild(item); + }); + } + + // Close dropdown when clicking outside + document.addEventListener('click', function closeDropdown(e) { + if (!dropdown.contains(e.target) && e.target !== button) { + dropdown.remove(); + document.removeEventListener('click', closeDropdown); + } + }); + + document.body.appendChild(dropdown); +})(); diff --git a/ankihub/labs/llm/templates.py b/ankihub/labs/llm/templates.py new file mode 100644 index 000000000..d51b2a626 --- /dev/null +++ b/ankihub/labs/llm/templates.py @@ -0,0 +1,89 @@ +"""Module for handling LLM template functionality in the editor.""" + +import json +import subprocess +from pathlib import Path +from typing import Any, List + +from aqt import gui_hooks +from aqt.editor import Editor +from aqt.utils import tooltip +from jinja2 import Template + +TEMPLATE_BTN_ID = "ankihub-btn-llm-templates" + + +def setup() -> None: + """Set up the LLM templates functionality.""" + gui_hooks.editor_did_init_buttons.append(_setup_editor_button) + gui_hooks.webview_did_receive_js_message.append(_handle_js_message) + + +def _setup_editor_button(buttons: List[str], editor: Editor) -> None: + """Add the LLM templates button to the editor.""" + template_button = editor.addButton( + icon=None, + cmd=TEMPLATE_BTN_ID, + func=_on_template_button_press, + label="Templates ▾", + id=TEMPLATE_BTN_ID, + disables=False, + ) + buttons.append(template_button) + + # Add button styling + buttons.append( + "" + ) + + +def _get_template_files() -> List[str]: + """Get list of template files from the LLM templates directory.""" + try: + # Get templates directory path from llm command + result = subprocess.run( + ["llm", "templates", "path"], capture_output=True, text=True, check=True + ) + templates_path = Path(result.stdout.strip()) + + # Get all yaml files from the directory + yaml_files = [] + if templates_path.exists(): + yaml_files = [f.name for f in templates_path.glob("*.yaml") if f.is_file()] + yaml_files.sort() + + return yaml_files + except (subprocess.CalledProcessError, FileNotFoundError): + return ["No templates found"] + + +def _on_template_button_press(editor: Editor) -> None: + """Handle the template button click by showing a dropdown menu.""" + options = _get_template_files() + + # Read and render the JavaScript template + js_template_path = Path(__file__).parent / "editor_dropdown.js" + with open(js_template_path, "r") as f: + template = Template(f.read()) + + script = template.render(button_id=TEMPLATE_BTN_ID, options=json.dumps(options)) + editor.web.eval(script) + + +def _handle_template_selection(editor: Editor, template_name: str) -> None: + """Handle when a template is selected from the dropdown.""" + tooltip(f"Selected template: {template_name}") + + +def _handle_js_message( + handled: tuple[bool, Any], message: str, context: Any +) -> tuple[bool, Any]: + """Handle JavaScript messages for template selection.""" + if message.startswith("template-select:"): + template_name = message.split(":", 1)[1] + _handle_template_selection(context, template_name) + return (True, None) + return handled diff --git a/ankihub/settings.py b/ankihub/settings.py index 8a3b29e81..b1d0a1852 100644 --- a/ankihub/settings.py +++ b/ankihub/settings.py @@ -193,6 +193,7 @@ def __init__(self): self.app_url: Optional[str] = None self.s3_bucket_url: Optional[str] = None self.anking_deck_id: Optional[uuid.UUID] = None + self.labs_enabled: bool = True if os.getenv("LABS_ENABLED") == "True" else False def setup_public_config_and_other_settings(self): migrate_public_config() From 906e157e74619af1e83a6726b83862a880c58aed Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Tue, 11 Feb 2025 11:51:41 +0000 Subject: [PATCH 02/53] Rename stuff. --- ankihub/entry_point.py | 6 +- ankihub/labs/llm/llm.py | 96 +++++++++++++++++++ ...{editor_dropdown.js => prompt_selector.js} | 16 ++-- 3 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 ankihub/labs/llm/llm.py rename ankihub/labs/llm/{editor_dropdown.js => prompt_selector.js} (81%) diff --git a/ankihub/entry_point.py b/ankihub/entry_point.py index c9e946512..b540d8b43 100644 --- a/ankihub/entry_point.py +++ b/ankihub/entry_point.py @@ -29,7 +29,7 @@ from .gui.media_sync import media_sync from .gui.menu import menu_state, refresh_ankihub_menu, setup_ankihub_menu from .gui.operations.ankihub_sync import setup_full_sync_patch -from .labs.llm import templates as llm_templates +from .labs.llm import llm from .main.note_deletion import handle_notes_deleted_from_webapp from .main.utils import modify_note_type_templates from .settings import ( @@ -240,8 +240,8 @@ def _general_setup(): ) if config.labs_enabled: - llm_templates.setup() - LOGGER.info("Set up LLM templates.") + llm.setup() + LOGGER.info("Set up LLM prompt functionality.") def _copy_web_media_to_media_folder(): diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py new file mode 100644 index 000000000..3b3c0f23a --- /dev/null +++ b/ankihub/labs/llm/llm.py @@ -0,0 +1,96 @@ +"""Module for handling LLM prompt functionality in the editor.""" + +import json +import subprocess +from pathlib import Path +from typing import Any, List + +from aqt import gui_hooks +from aqt.editor import Editor +from aqt.utils import tooltip +from jinja2 import Template + +PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" + + +def setup() -> None: + """Set up the LLM prompt functionality.""" + gui_hooks.editor_did_init_buttons.append(_setup_prompt_selector_button) + gui_hooks.webview_did_receive_js_message.append(_handle_js_message) + + +def _setup_prompt_selector_button(buttons: List[str], editor: Editor) -> None: + """Add the LLM prompt selector button to the editor.""" + prompt_button = editor.addButton( + icon=None, + cmd=PROMPT_SELECTOR_BTN_ID, + func=_on_prompt_button_press, + label="Prompts ▾", + id=PROMPT_SELECTOR_BTN_ID, + disables=False, + ) + buttons.append(prompt_button) + + # Add button styling + buttons.append( + "" + ) + + +def _get_prompt_templates() -> List[str]: + """Get list of prompt template files from the LLM templates directory.""" + try: + # Get templates directory path from llm command + result = subprocess.run( + ["llm", "templates", "path"], capture_output=True, text=True, check=True + ) + templates_path = Path(result.stdout.strip()) + + # Get all yaml files from the directory + yaml_files = [] + if templates_path.exists(): + yaml_files = [f.name for f in templates_path.glob("*.yaml") if f.is_file()] + yaml_files.sort() + + return yaml_files + except (subprocess.CalledProcessError, FileNotFoundError): + return ["No prompt templates found"] + + +def _on_prompt_button_press(editor: Editor) -> None: + """Handle the prompt selector button click by showing a dropdown menu.""" + prompts = _get_prompt_templates() + + # Read and render the JavaScript template + js_template_path = Path(__file__).parent / "prompt_selector.js" + with open(js_template_path, "r") as f: + template = Template(f.read()) + + script = template.render( + button_id=PROMPT_SELECTOR_BTN_ID, options=json.dumps(prompts) + ) + editor.web.eval(script) + + +def _execute_prompt_template(editor: Editor, template_name: str) -> None: + """Execute the selected prompt template with the current note as input.""" + # TODO: Implement prompt execution logic + # 1. Load the selected template + # 2. Extract note content + # 3. Run the prompt through the LLM + # 4. Handle the response + tooltip(f"Will execute prompt template: {template_name}") + + +def _handle_js_message( + handled: tuple[bool, Any], message: str, context: Any +) -> tuple[bool, Any]: + """Handle JavaScript messages for prompt template selection.""" + if message.startswith("prompt-select:"): + template_name = message.split(":", 1)[1] + _execute_prompt_template(context, template_name) + return (True, None) + return handled diff --git a/ankihub/labs/llm/editor_dropdown.js b/ankihub/labs/llm/prompt_selector.js similarity index 81% rename from ankihub/labs/llm/editor_dropdown.js rename to ankihub/labs/llm/prompt_selector.js index c90d37ae7..19e8be19b 100644 --- a/ankihub/labs/llm/editor_dropdown.js +++ b/ankihub/labs/llm/prompt_selector.js @@ -3,7 +3,7 @@ const rect = button.getBoundingClientRect(); // Remove existing dropdown if any - const existingDropdown = document.getElementById('template-dropdown'); + const existingDropdown = document.getElementById('prompt-dropdown'); if (existingDropdown) { existingDropdown.remove(); return; @@ -11,7 +11,7 @@ // Create dropdown const dropdown = document.createElement('div'); - dropdown.id = 'template-dropdown'; + dropdown.id = 'prompt-dropdown'; dropdown.style.position = 'absolute'; dropdown.style.left = rect.left + 'px'; dropdown.style.top = (rect.bottom + 2) + 'px'; @@ -24,19 +24,19 @@ dropdown.style.overflowY = 'auto'; // Parse the options JSON string - const templateOptions = JSON.parse('{{ options }}'); + const promptTemplates = JSON.parse('{{ options }}'); - if (templateOptions.length === 0) { + if (promptTemplates.length === 0 || promptTemplates[0] === 'No prompt templates found') { const item = document.createElement('div'); - item.textContent = 'No templates found'; + item.textContent = 'No prompt templates found'; item.style.padding = '8px 12px'; item.style.color = '#666'; item.style.fontStyle = 'italic'; dropdown.appendChild(item); } else { - templateOptions.forEach(opt => { + promptTemplates.forEach(template => { const item = document.createElement('div'); - item.textContent = opt; + item.textContent = template; item.style.padding = '8px 12px'; item.style.cursor = 'pointer'; item.style.whiteSpace = 'nowrap'; @@ -50,7 +50,7 @@ }); item.addEventListener('click', () => { - pycmd(`template-select:${opt}`); + pycmd(`prompt-select:${template}`); dropdown.remove(); }); From 16c906a98b7fe4304019db4f848f5b77ab95b597 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Tue, 11 Feb 2025 13:20:33 +0000 Subject: [PATCH 03/53] Fix dq window display issue and run subprocess properly. --- ankihub/labs/llm/llm.py | 74 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 6 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 3b3c0f23a..b112b9a8a 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -5,8 +5,10 @@ from pathlib import Path from typing import Any, List +import aqt from aqt import gui_hooks from aqt.editor import Editor +from aqt.qt import QDialog, QPushButton, QTextEdit, QVBoxLayout from aqt.utils import tooltip from jinja2 import Template @@ -75,14 +77,74 @@ def _on_prompt_button_press(editor: Editor) -> None: editor.web.eval(script) +def _get_note_content(editor: Editor) -> str: + """Extract content from the current note's fields.""" + note = editor.note + if not note: + return "" + + fields_dict = {name: note[name] for name in note.keys()} + return json.dumps(fields_dict) + + +def _show_llm_response(response: str) -> None: + """Display the LLM response in a dialog.""" + dialog = QDialog(aqt.mw) + dialog.setWindowTitle("LLM Response") + dialog.setMinimumWidth(600) + dialog.setMinimumHeight(400) + + layout = QVBoxLayout() + + # Create text display area + text_edit = QTextEdit() + text_edit.setPlainText(response) + text_edit.setReadOnly(True) + layout.addWidget(text_edit) + + # Add close button + close_button = QPushButton("Close") + close_button.clicked.connect(dialog.accept) + layout.addWidget(close_button) + + dialog.setLayout(layout) + dialog.exec() + + def _execute_prompt_template(editor: Editor, template_name: str) -> None: """Execute the selected prompt template with the current note as input.""" - # TODO: Implement prompt execution logic - # 1. Load the selected template - # 2. Extract note content - # 3. Run the prompt through the LLM - # 4. Handle the response - tooltip(f"Will execute prompt template: {template_name}") + note_content = _get_note_content(editor) + if not note_content: + tooltip("No note content available") + return + + try: + # Run the LLM command with the template and note content + # Use shlex.quote to properly escape the note content for shell command + import shlex + + escaped_content = shlex.quote(note_content) + result = subprocess.run( + [ + "llm", + "--no-stream", + "-t", + template_name.replace(".yaml", ""), + escaped_content, + ], + capture_output=True, + text=True, + check=True, + ) + + # Show the response in a dialog + _show_llm_response(result.stdout) + + except subprocess.CalledProcessError as e: + error_msg = f"Error running LLM command: {e.stderr}" + tooltip(error_msg) + except Exception as e: + tooltip(f"Unexpected error: {str(e)}") def _handle_js_message( From 277562a17fe2de18c28dd57b4d1fa7968949a4b7 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 08:55:29 +0000 Subject: [PATCH 04/53] Update note with LLM response. --- ankihub/labs/llm/llm.py | 69 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index b112b9a8a..017d2f6f3 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -3,13 +3,13 @@ import json import subprocess from pathlib import Path -from typing import Any, List +from typing import Any, Dict, List import aqt from aqt import gui_hooks from aqt.editor import Editor -from aqt.qt import QDialog, QPushButton, QTextEdit, QVBoxLayout -from aqt.utils import tooltip +from aqt.qt import QDialog, QHBoxLayout, QPushButton, QTextEdit, QVBoxLayout +from aqt.utils import showWarning, tooltip from jinja2 import Template PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" @@ -87,8 +87,24 @@ def _get_note_content(editor: Editor) -> str: return json.dumps(fields_dict) -def _show_llm_response(response: str) -> None: - """Display the LLM response in a dialog.""" +def _update_note_fields(editor: Editor, new_fields: Dict[str, str]) -> None: + """Update the note fields with new content.""" + note = editor.note + if not note: + return + + # Only update fields that exist in the note + for field_name, new_content in new_fields.items(): + if field_name in note: + note[field_name] = new_content + + # Save changes and update the editor + note.flush() + editor.loadNote() + + +def _show_llm_response(editor: Editor, response: str) -> None: + """Display the LLM response in a dialog with option to update note.""" dialog = QDialog(aqt.mw) dialog.setWindowTitle("LLM Response") dialog.setMinimumWidth(600) @@ -102,15 +118,44 @@ def _show_llm_response(response: str) -> None: text_edit.setReadOnly(True) layout.addWidget(text_edit) + # Create button row + button_layout = QHBoxLayout() + + # Add update button + update_button = QPushButton("Update Note") + update_button.clicked.connect(lambda: _handle_update_note(editor, response, dialog)) + button_layout.addWidget(update_button) + # Add close button close_button = QPushButton("Close") close_button.clicked.connect(dialog.accept) - layout.addWidget(close_button) + button_layout.addWidget(close_button) + layout.addLayout(button_layout) dialog.setLayout(layout) dialog.exec() +def _handle_update_note(editor: Editor, response: str, dialog: QDialog) -> None: + """Handle the update note button click.""" + try: + # Parse the JSON response + new_fields = json.loads(response) + if not isinstance(new_fields, dict): + showWarning("Invalid response format. Expected a JSON object.") + return + + # Update the note + _update_note_fields(editor, new_fields) + tooltip("Note updated successfully") + dialog.accept() + + except json.JSONDecodeError: + showWarning("Invalid JSON response from LLM") + except Exception as e: + showWarning(f"Error updating note: {str(e)}") + + def _execute_prompt_template(editor: Editor, template_name: str) -> None: """Execute the selected prompt template with the current note as input.""" note_content = _get_note_content(editor) @@ -124,13 +169,21 @@ def _execute_prompt_template(editor: Editor, template_name: str) -> None: import shlex escaped_content = shlex.quote(note_content) + # TODO Exclude ankihub_id field + note_schema = json.dumps([{field: "string" for field in editor.note.keys()}]) result = subprocess.run( [ "llm", "--no-stream", "-t", template_name.replace(".yaml", ""), + "-p", + "note_schema", + shlex.quote(note_schema), escaped_content, + "-o", + "json_object", + "1", ], capture_output=True, text=True, @@ -138,13 +191,15 @@ def _execute_prompt_template(editor: Editor, template_name: str) -> None: ) # Show the response in a dialog - _show_llm_response(result.stdout) + _show_llm_response(editor, result.stdout) except subprocess.CalledProcessError as e: error_msg = f"Error running LLM command: {e.stderr}" tooltip(error_msg) + raise Exception(error_msg) except Exception as e: tooltip(f"Unexpected error: {str(e)}") + raise Exception(f"Unexpected error: {str(e)}") def _handle_js_message( From bf6a6cfa2cb371d2378a8c8ba9d3c97fb60a8cb0 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 11:40:29 +0000 Subject: [PATCH 05/53] Check and install uv. --- ankihub/labs/llm/llm.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 017d2f6f3..d08394905 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -1,6 +1,7 @@ """Module for handling LLM prompt functionality in the editor.""" import json +import platform import subprocess from pathlib import Path from typing import Any, Dict, List @@ -15,8 +16,32 @@ PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" +def _check_and_install_uv() -> None: + """Check if uv is installed and install it if not.""" + try: + subprocess.run(["uv", "version"], capture_output=True, check=True) + except (subprocess.CalledProcessError, FileNotFoundError): + try: + if platform.system() == "Darwin": # macOS + subprocess.run( + "curl -LsSf https://astral.sh/uv/install.sh | sh", + shell=True, + check=True, + ) + elif platform.system() == "Windows": + subprocess.run( + 'powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"', + shell=True, + check=True, + ) + tooltip("Successfully installed uv") + except subprocess.CalledProcessError as e: + showWarning(f"Failed to install uv: {str(e)}") + + def setup() -> None: """Set up the LLM prompt functionality.""" + _check_and_install_uv() gui_hooks.editor_did_init_buttons.append(_setup_prompt_selector_button) gui_hooks.webview_did_receive_js_message.append(_handle_js_message) From 4f87112e100526c2361af3c8a347567751847a63 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 11:42:50 +0000 Subject: [PATCH 06/53] Install llm with uv. --- ankihub/labs/llm/llm.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index d08394905..9b4e9c138 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -20,6 +20,7 @@ def _check_and_install_uv() -> None: """Check if uv is installed and install it if not.""" try: subprocess.run(["uv", "version"], capture_output=True, check=True) + print("uv is installed") except (subprocess.CalledProcessError, FileNotFoundError): try: if platform.system() == "Darwin": # macOS @@ -39,9 +40,27 @@ def _check_and_install_uv() -> None: showWarning(f"Failed to install uv: {str(e)}") +def _install_llm() -> None: + """Install llm using uv if not already installed.""" + try: + subprocess.run(["llm", "--version"], capture_output=True, check=True) + print("llm is already installed") + except (subprocess.CalledProcessError, FileNotFoundError): + try: + subprocess.run( + ["uv", "install", "llm"], + check=True, + capture_output=True, + ) + tooltip("Successfully installed llm") + except subprocess.CalledProcessError as e: + showWarning(f"Failed to install llm: {str(e)}") + + def setup() -> None: """Set up the LLM prompt functionality.""" _check_and_install_uv() + _install_llm() gui_hooks.editor_did_init_buttons.append(_setup_prompt_selector_button) gui_hooks.webview_did_receive_js_message.append(_handle_js_message) From c0a4bc2e02e83312740b1b09f9e7c06024219054 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 11:56:20 +0000 Subject: [PATCH 07/53] Fix uv run issue. --- ankihub/labs/llm/llm.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 9b4e9c138..2cfbc0b9b 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -91,7 +91,10 @@ def _get_prompt_templates() -> List[str]: try: # Get templates directory path from llm command result = subprocess.run( - ["llm", "templates", "path"], capture_output=True, text=True, check=True + ["uv", "run", "--no-project", "llm", "templates", "path"], + capture_output=True, + text=True, + check=True, ) templates_path = Path(result.stdout.strip()) @@ -217,7 +220,12 @@ def _execute_prompt_template(editor: Editor, template_name: str) -> None: note_schema = json.dumps([{field: "string" for field in editor.note.keys()}]) result = subprocess.run( [ + "uv", + "run", + "--no-project", "llm", + "-m", + "gpt-4o", "--no-stream", "-t", template_name.replace(".yaml", ""), From f21819e95e5090569efd47a2efadafc0289949e0 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 12:02:55 +0000 Subject: [PATCH 08/53] Install provides. --- ankihub/labs/llm/llm.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 2cfbc0b9b..22040c82a 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -41,18 +41,33 @@ def _check_and_install_uv() -> None: def _install_llm() -> None: - """Install llm using uv if not already installed.""" + """Install llm and additional providers using uv if not already installed.""" try: subprocess.run(["llm", "--version"], capture_output=True, check=True) print("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: + # Install base llm package subprocess.run( ["uv", "install", "llm"], check=True, capture_output=True, ) tooltip("Successfully installed llm") + + # Install additional providers + providers = ["llm-gemini", "llm-perplexity", "llm-claude-3"] + for provider in providers: + try: + subprocess.run( + ["uv", "run", "--no-project", "llm", "install", "-U", provider], + check=True, + capture_output=True, + ) + print(f"Successfully installed {provider}") + except subprocess.CalledProcessError as e: + showWarning(f"Failed to install {provider}: {str(e)}") + except subprocess.CalledProcessError as e: showWarning(f"Failed to install llm: {str(e)}") @@ -224,6 +239,9 @@ def _execute_prompt_template(editor: Editor, template_name: str) -> None: "run", "--no-project", "llm", + # TODO Allow users to choose model + # TODO Allow users to continue a conversation + # TODO Allow users to add an attachment "-m", "gpt-4o", "--no-stream", From 82e24813f549d1b7b6ddf531ae97d452be0ac184 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 13:02:25 +0000 Subject: [PATCH 09/53] show diff. --- ankihub/labs/llm/llm.py | 61 +++++++++++++++++++++++++++++++--- scripts/setup_addon_symlink.py | 10 ++++++ 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 22040c82a..f11f36708 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -1,5 +1,6 @@ """Module for handling LLM prompt functionality in the editor.""" +import difflib import json import platform import subprocess @@ -42,6 +43,7 @@ def _check_and_install_uv() -> None: def _install_llm() -> None: """Install llm and additional providers using uv if not already installed.""" + # TODO Prompt users to set up their API keys. try: subprocess.run(["llm", "--version"], capture_output=True, check=True) print("llm is already installed") @@ -165,19 +167,68 @@ def _update_note_fields(editor: Editor, new_fields: Dict[str, str]) -> None: editor.loadNote() +def _create_diff_html(original: str, suggested: str) -> str: + """Create HTML diff between original and suggested text.""" + differ = difflib.Differ() + diff = list(differ.compare(original.splitlines(True), suggested.splitlines(True))) + + html = [] + for line in diff: + if line.startswith("+"): + html.append(f'{line[2:]}') + elif line.startswith("-"): + html.append( + f'{line[2:]}' + ) + elif line.startswith("?"): + continue + else: + html.append(line[2:]) + + return "".join(html) + + def _show_llm_response(editor: Editor, response: str) -> None: """Display the LLM response in a dialog with option to update note.""" + try: + suggested_fields = json.loads(response) + if not isinstance(suggested_fields, dict): + showWarning("Invalid response format. Expected a JSON object.") + return + except json.JSONDecodeError: + showWarning("Invalid JSON response from LLM") + return + dialog = QDialog(aqt.mw) - dialog.setWindowTitle("LLM Response") - dialog.setMinimumWidth(600) - dialog.setMinimumHeight(400) + dialog.setWindowTitle("LLM Response - Field Changes") + dialog.setMinimumWidth(800) + dialog.setMinimumHeight(600) layout = QVBoxLayout() - # Create text display area + # Create text display area with HTML formatting text_edit = QTextEdit() - text_edit.setPlainText(response) text_edit.setReadOnly(True) + + # Build HTML content showing diffs for each field + html_content = [ + "", + ] + + note = editor.note + for field_name, suggested_content in suggested_fields.items(): + if field_name in note: + original_content = note[field_name] + html_content.append(f'
{field_name}:
') + html_content.append('
') + html_content.append(_create_diff_html(original_content, suggested_content)) + html_content.append("
") + + text_edit.setHtml("".join(html_content)) layout.addWidget(text_edit) # Create button row diff --git a/scripts/setup_addon_symlink.py b/scripts/setup_addon_symlink.py index 8b6a1b42f..001f7941a 100644 --- a/scripts/setup_addon_symlink.py +++ b/scripts/setup_addon_symlink.py @@ -14,6 +14,16 @@ def setup_addon_symlink(anki_base_path: Path) -> None: print(f"Created addons21 directory: {addons21_dir} (if it didn't exist already)") addon_dst = addons21_dir / "ankihub" + ankiwebview_inspector = Path( + "/Users/andrewsanchez/Projects/anki21-addon-ankiwebview-inspector/src" + ) + inspector_dst = addons21_dir / "inspector" + try: + os.remove(inspector_dst) + print(f"Removed existing symlink: {inspector_dst}") + except FileNotFoundError: + pass + inspector_dst.symlink_to(ankiwebview_inspector) if addon_dst.is_symlink(): os.remove(addon_dst) From c0bdc0cc794e3237646c0b214e980b33273661f3 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 13:48:33 +0000 Subject: [PATCH 10/53] Update ankihub/labs/llm/llm.py Co-authored-by: Jakub Fidler <31575114+RisingOrange@users.noreply.github.com> --- ankihub/labs/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index f11f36708..8175519fc 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -51,7 +51,7 @@ def _install_llm() -> None: try: # Install base llm package subprocess.run( - ["uv", "install", "llm"], + ["uv", "tool", "install", "llm"], check=True, capture_output=True, ) From 4a284db3bd14f769de0cfe607380145cad678224 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 13:11:42 +0000 Subject: [PATCH 11/53] Show only anki templates. --- ankihub/labs/llm/llm.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 8175519fc..c40330deb 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -115,13 +115,17 @@ def _get_prompt_templates() -> List[str]: ) templates_path = Path(result.stdout.strip()) - # Get all yaml files from the directory + # Get yaml files that start with "Anki" (case insensitive) yaml_files = [] if templates_path.exists(): - yaml_files = [f.name for f in templates_path.glob("*.yaml") if f.is_file()] + yaml_files = [ + f.stem # stem gives filename without extension + for f in templates_path.glob("*.yaml") + if f.is_file() and f.stem.lower().startswith("anki") + ] yaml_files.sort() - return yaml_files + return yaml_files or ["No Anki templates found"] except (subprocess.CalledProcessError, FileNotFoundError): return ["No prompt templates found"] From 36ff3080eb94bc962d7c91c8b98230f36e9f3929 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 15:02:16 +0000 Subject: [PATCH 12/53] Delete templates.py. --- ankihub/labs/llm/templates.py | 89 ----------------------------------- 1 file changed, 89 deletions(-) delete mode 100644 ankihub/labs/llm/templates.py diff --git a/ankihub/labs/llm/templates.py b/ankihub/labs/llm/templates.py deleted file mode 100644 index d51b2a626..000000000 --- a/ankihub/labs/llm/templates.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Module for handling LLM template functionality in the editor.""" - -import json -import subprocess -from pathlib import Path -from typing import Any, List - -from aqt import gui_hooks -from aqt.editor import Editor -from aqt.utils import tooltip -from jinja2 import Template - -TEMPLATE_BTN_ID = "ankihub-btn-llm-templates" - - -def setup() -> None: - """Set up the LLM templates functionality.""" - gui_hooks.editor_did_init_buttons.append(_setup_editor_button) - gui_hooks.webview_did_receive_js_message.append(_handle_js_message) - - -def _setup_editor_button(buttons: List[str], editor: Editor) -> None: - """Add the LLM templates button to the editor.""" - template_button = editor.addButton( - icon=None, - cmd=TEMPLATE_BTN_ID, - func=_on_template_button_press, - label="Templates ▾", - id=TEMPLATE_BTN_ID, - disables=False, - ) - buttons.append(template_button) - - # Add button styling - buttons.append( - "" - ) - - -def _get_template_files() -> List[str]: - """Get list of template files from the LLM templates directory.""" - try: - # Get templates directory path from llm command - result = subprocess.run( - ["llm", "templates", "path"], capture_output=True, text=True, check=True - ) - templates_path = Path(result.stdout.strip()) - - # Get all yaml files from the directory - yaml_files = [] - if templates_path.exists(): - yaml_files = [f.name for f in templates_path.glob("*.yaml") if f.is_file()] - yaml_files.sort() - - return yaml_files - except (subprocess.CalledProcessError, FileNotFoundError): - return ["No templates found"] - - -def _on_template_button_press(editor: Editor) -> None: - """Handle the template button click by showing a dropdown menu.""" - options = _get_template_files() - - # Read and render the JavaScript template - js_template_path = Path(__file__).parent / "editor_dropdown.js" - with open(js_template_path, "r") as f: - template = Template(f.read()) - - script = template.render(button_id=TEMPLATE_BTN_ID, options=json.dumps(options)) - editor.web.eval(script) - - -def _handle_template_selection(editor: Editor, template_name: str) -> None: - """Handle when a template is selected from the dropdown.""" - tooltip(f"Selected template: {template_name}") - - -def _handle_js_message( - handled: tuple[bool, Any], message: str, context: Any -) -> tuple[bool, Any]: - """Handle JavaScript messages for template selection.""" - if message.startswith("template-select:"): - template_name = message.split(":", 1)[1] - _handle_template_selection(context, template_name) - return (True, None) - return handled From ca241314bfa51cfb8141d4aa265c9b3aa0cbe615 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 15:02:24 +0000 Subject: [PATCH 13/53] Add prompt templates. --- .../llm/prompt_templates/anki-improve.yaml | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 ankihub/labs/llm/prompt_templates/anki-improve.yaml diff --git a/ankihub/labs/llm/prompt_templates/anki-improve.yaml b/ankihub/labs/llm/prompt_templates/anki-improve.yaml new file mode 100644 index 000000000..844fb3bce --- /dev/null +++ b/ankihub/labs/llm/prompt_templates/anki-improve.yaml @@ -0,0 +1,31 @@ +system: | + You are an expert at creating effective flashcards. Analyze the provided note and respond with an improved note with the following JSON format schema: + + $note_schema + + Follow these guidelines: + 1. Use cloze deletions to hide key information, typically one or two words at a time. + 2. Use the {{c1::hidden text}} format for cloze deletions. You can also use {{c2::hidden text}}, {{c3::hidden text}}, and so on. This will result in multiple cards being created for the note by Anki. Cloze deletions with the same number will be hidden on the same card, while all the other cloze deletions won't be used for this card. + 3. Most notes should only have c1, notes with c3 or more should be rare. + 4. Most notes should only have one cloze deletion. Focus on the highest yield concepts and facts. "high-yield" means the information is most important or most frequently tested on exams. + 5. Create focused cards which tests a single concept or fact. + 6. Ensure the remaining context provides enough information for the user to recall the hidden content. + + Here are some examples of well-constructed cloze deletion notes: + - Persistent {{c1::infection}} is the most common symptom of chronic sinopulmonary disease + - Chronic sinopulmonary disease commonly presents with chronic {{c1::cough}} and {{c1::sputum}} production. + - 11-deoxycorticosterone (DOC) may be converted to {{c1::corticosterone}} via the enzyme {{c2::11β-hydroxylase}}. + - 11β-hydroxylase deficiency presents with {{c1::hypokalemia}} and {{c1::hypertension}}. + - The {{c1::orbit}} is the bony socket housing the eyeball and contains muscles for {{c2::eye movement}} and {{c2::eyelid opening}}. + - The upper portion of the nasal septum is formed by the {{c1::ethmoid}} bone; the lower portion is formed by the {{c2::vomer}} bone. + - {{c1::Bone marrow}} within bones stores {{c2::fat}} and produces {{c3::blood cells}}. + - Two main side effects of osmotic laxatives include {{c1::diarrhea}} and {{c1::dehydration}}. + - Newly synthesized cGMP (from guanylyl cyclase activity) activates {{c1::protein kinase G}}, which phosphorylates specific proteins responsible for physiologic actions. + - The posterior lobe of the pituitary gland is derived from {{c1::neural}} tissue. + - The skeleton protects {{c1::internal organs}} and stores minerals like {{c2::calcium}} and {{c2::phosphate}}. + + Improve the provided cloze deletion note based on the instructions above. Ensure that the card is clear, and effective for learning and recall. + +prompt: | + Here is the card to improve: + $input From f7e104224c9dcd7d03c72ca96b4f9e55523e91ee Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 15:10:28 +0000 Subject: [PATCH 14/53] Preview prompt. --- ankihub/labs/llm/llm.py | 129 ++++++++++++++++++++++++++++++++++------ 1 file changed, 111 insertions(+), 18 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index c40330deb..8c8c6b206 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -8,15 +8,87 @@ from typing import Any, Dict, List import aqt -from aqt import gui_hooks +from aqt import QFont, gui_hooks from aqt.editor import Editor -from aqt.qt import QDialog, QHBoxLayout, QPushButton, QTextEdit, QVBoxLayout +from aqt.qt import QDialog, QHBoxLayout, QLabel, QPushButton, QTextEdit, QVBoxLayout from aqt.utils import showWarning, tooltip from jinja2 import Template PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" +class PromptPreviewDialog(QDialog): + """Dialog for previewing and editing a prompt template before execution.""" + + def __init__(self, parent, template_name: str, editor: Editor) -> None: + super().__init__(parent) + self.template_name = template_name + self.editor = editor + self.template_content = self._load_template() + self._setup_ui() + + def _load_template(self) -> str: + """Load the content of the template file.""" + try: + result = subprocess.run( + ["uv", "run", "--no-project", "llm", "templates", "path"], + capture_output=True, + text=True, + check=True, + ) + templates_path = Path(result.stdout.strip()) + template_file = templates_path / f"{self.template_name}.yaml" + + if template_file.exists(): + return template_file.read_text() + else: + return "Template file not found" + except Exception as e: + return f"Error loading template: {str(e)}" + + def _setup_ui(self) -> None: + """Set up the dialog UI.""" + self.setWindowTitle(f"Preview Template: {self.template_name}") + self.setMinimumWidth(800) + self.setMinimumHeight(600) + + layout = QVBoxLayout() + + # Add description label + description = QLabel("Review and edit the prompt template below:") + description.setWordWrap(True) + layout.addWidget(description) + + # Add template editor + self.template_edit = QTextEdit() + self.template_edit.setPlainText(self.template_content) + self.template_edit.setFont(QFont("Consolas")) + layout.addWidget(self.template_edit) + + # Add button row + button_layout = QHBoxLayout() + + # Execute button + execute_button = QPushButton("Execute Prompt") + execute_button.clicked.connect(self._on_execute) + button_layout.addWidget(execute_button) + + # Cancel button + cancel_button = QPushButton("Cancel") + cancel_button.clicked.connect(self.reject) + button_layout.addWidget(cancel_button) + + layout.addLayout(button_layout) + self.setLayout(layout) + + def _on_execute(self) -> None: + """Handle the execute button click.""" + modified_template = self.template_edit.toPlainText() + # TODO Save the modified template if it differs from the original + _execute_prompt_template(self.editor, self.template_name, modified_template) + self.accept() + + def _check_and_install_uv() -> None: """Check if uv is installed and install it if not.""" try: @@ -273,7 +345,15 @@ def _handle_update_note(editor: Editor, response: str, dialog: QDialog) -> None: showWarning(f"Error updating note: {str(e)}") -def _execute_prompt_template(editor: Editor, template_name: str) -> None: +def _handle_prompt_selection(editor: Editor, template_name: str) -> None: + """Handle the selection of a prompt template.""" + dialog = PromptPreviewDialog(None, template_name, editor) + dialog.exec() + + +def _execute_prompt_template( + editor: Editor, template_name: str, template_content=None +) -> None: """Execute the selected prompt template with the current note as input.""" note_content = _get_note_content(editor) if not note_content: @@ -288,20 +368,29 @@ def _execute_prompt_template(editor: Editor, template_name: str) -> None: escaped_content = shlex.quote(note_content) # TODO Exclude ankihub_id field note_schema = json.dumps([{field: "string" for field in editor.note.keys()}]) - result = subprocess.run( + + cmd = [ + "uv", + "run", + "--no-project", + "llm", + # TODO Allow users to choose model + # TODO Allow users to continue a conversation + # TODO Allow users to add an attachment + "-m", + "gpt-4o", + "--no-stream", + ] + + if template_content: + # If we have modified template content, pass it via stdin + cmd.extend(["-s", template_content]) + else: + # Otherwise use the template file + cmd.extend(["-t", template_name]) + + cmd.extend( [ - "uv", - "run", - "--no-project", - "llm", - # TODO Allow users to choose model - # TODO Allow users to continue a conversation - # TODO Allow users to add an attachment - "-m", - "gpt-4o", - "--no-stream", - "-t", - template_name.replace(".yaml", ""), "-p", "note_schema", shlex.quote(note_schema), @@ -309,7 +398,11 @@ def _execute_prompt_template(editor: Editor, template_name: str) -> None: "-o", "json_object", "1", - ], + ] + ) + + result = subprocess.run( + cmd, capture_output=True, text=True, check=True, @@ -333,6 +426,6 @@ def _handle_js_message( """Handle JavaScript messages for prompt template selection.""" if message.startswith("prompt-select:"): template_name = message.split(":", 1)[1] - _execute_prompt_template(context, template_name) + _handle_prompt_selection(context, template_name) return (True, None) return handled From 3d0f67ad27590ca4a2e18706a8f6280ca5f138c0 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 15:16:20 +0000 Subject: [PATCH 15/53] perf: get templates dir once. --- ankihub/labs/llm/llm.py | 106 +++++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 8c8c6b206..d95d824a8 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -17,18 +17,14 @@ PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" -class PromptPreviewDialog(QDialog): - """Dialog for previewing and editing a prompt template before execution.""" +class TemplateManager: + """Manages LLM template operations and caching.""" - def __init__(self, parent, template_name: str, editor: Editor) -> None: - super().__init__(parent) - self.template_name = template_name - self.editor = editor - self.template_content = self._load_template() - self._setup_ui() + _templates_path = None - def _load_template(self) -> str: - """Load the content of the template file.""" + @classmethod + def initialize(cls) -> None: + """Initialize the template manager by finding the templates directory.""" try: result = subprocess.run( ["uv", "run", "--no-project", "llm", "templates", "path"], @@ -36,15 +32,66 @@ def _load_template(self) -> str: text=True, check=True, ) - templates_path = Path(result.stdout.strip()) - template_file = templates_path / f"{self.template_name}.yaml" + cls._templates_path = Path(result.stdout.strip()) + print(f"Templates directory: {cls._templates_path}") + except subprocess.CalledProcessError as e: + print(f"Error finding templates directory: {e.stderr}") + cls._templates_path = None + except Exception as e: + print(f"Unexpected error finding templates directory: {str(e)}") + cls._templates_path = None + + @classmethod + def get_templates_path(cls): + """Get the cached templates path.""" + if cls._templates_path is None: + cls.initialize() + return cls._templates_path + + @classmethod + def get_template_content(cls, template_name: str) -> str: + """Get the content of a specific template.""" + templates_path = cls.get_templates_path() + if not templates_path: + return "Error: Templates directory not found" + + template_file = templates_path / f"{template_name}.yaml" + if not template_file.exists(): + return "Template file not found" - if template_file.exists(): - return template_file.read_text() - else: - return "Template file not found" + try: + return template_file.read_text() except Exception as e: - return f"Error loading template: {str(e)}" + return f"Error reading template: {str(e)}" + + @classmethod + def get_anki_templates(cls) -> List[str]: + """Get list of Anki-specific template names.""" + templates_path = cls.get_templates_path() + if not templates_path or not templates_path.exists(): + return ["No prompt templates found"] + + try: + yaml_files = [ + f.stem + for f in templates_path.glob("*.yaml") + if f.is_file() and f.stem.lower().startswith("anki") + ] + yaml_files.sort() + return yaml_files or ["No Anki templates found"] + except Exception: + return ["Error listing templates"] + + +class PromptPreviewDialog(QDialog): + """Dialog for previewing and editing a prompt template before execution.""" + + def __init__(self, parent, template_name: str, editor: Editor) -> None: + super().__init__(parent) + self.template_name = template_name + self.editor = editor + self.template_content = TemplateManager.get_template_content(template_name) + self._setup_ui() def _setup_ui(self) -> None: """Set up the dialog UI.""" @@ -150,6 +197,7 @@ def setup() -> None: """Set up the LLM prompt functionality.""" _check_and_install_uv() _install_llm() + TemplateManager.initialize() # Initialize templates path gui_hooks.editor_did_init_buttons.append(_setup_prompt_selector_button) gui_hooks.webview_did_receive_js_message.append(_handle_js_message) @@ -177,29 +225,7 @@ def _setup_prompt_selector_button(buttons: List[str], editor: Editor) -> None: def _get_prompt_templates() -> List[str]: """Get list of prompt template files from the LLM templates directory.""" - try: - # Get templates directory path from llm command - result = subprocess.run( - ["uv", "run", "--no-project", "llm", "templates", "path"], - capture_output=True, - text=True, - check=True, - ) - templates_path = Path(result.stdout.strip()) - - # Get yaml files that start with "Anki" (case insensitive) - yaml_files = [] - if templates_path.exists(): - yaml_files = [ - f.stem # stem gives filename without extension - for f in templates_path.glob("*.yaml") - if f.is_file() and f.stem.lower().startswith("anki") - ] - yaml_files.sort() - - return yaml_files or ["No Anki templates found"] - except (subprocess.CalledProcessError, FileNotFoundError): - return ["No prompt templates found"] + return TemplateManager.get_anki_templates() def _on_prompt_button_press(editor: Editor) -> None: From e3b30d239c5f92717a7ef8498b32ceca542de0c2 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Wed, 12 Feb 2025 15:25:23 +0000 Subject: [PATCH 16/53] Copy templates. --- ankihub/labs/llm/llm.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index d95d824a8..dd53c976b 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -21,6 +21,7 @@ class TemplateManager: """Manages LLM template operations and caching.""" _templates_path = None + _local_templates_dir = Path(__file__).parent / "prompt_templates" @classmethod def initialize(cls) -> None: @@ -34,6 +35,9 @@ def initialize(cls) -> None: ) cls._templates_path = Path(result.stdout.strip()) print(f"Templates directory: {cls._templates_path}") + + # After finding templates path, try to copy local templates + cls._copy_local_templates() except subprocess.CalledProcessError as e: print(f"Error finding templates directory: {e.stderr}") cls._templates_path = None @@ -41,6 +45,27 @@ def initialize(cls) -> None: print(f"Unexpected error finding templates directory: {str(e)}") cls._templates_path = None + @classmethod + def _copy_local_templates(cls) -> None: + """Copy local templates to user's templates directory if they don't exist.""" + if not cls._templates_path or not cls._local_templates_dir.exists(): + return + + try: + # Create templates directory if it doesn't exist + cls._templates_path.mkdir(parents=True, exist_ok=True) + + # Copy each template that doesn't already exist + for template_file in cls._local_templates_dir.glob("*.yaml"): + target_path = cls._templates_path / template_file.name + if not target_path.exists(): + print(f"Copying template: {template_file.name}") + target_path.write_text(template_file.read_text()) + else: + print(f"Template already exists: {template_file.name}") + except Exception as e: + print(f"Error copying templates: {str(e)}") + @classmethod def get_templates_path(cls): """Get the cached templates path.""" @@ -208,7 +233,7 @@ def _setup_prompt_selector_button(buttons: List[str], editor: Editor) -> None: icon=None, cmd=PROMPT_SELECTOR_BTN_ID, func=_on_prompt_button_press, - label="Prompts ▾", + label="✨ LLM Prompts", id=PROMPT_SELECTOR_BTN_ID, disables=False, ) From af77d3c7d4bdd6c27c537cafa574c3ad886b6ba6 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Wed, 12 Feb 2025 17:56:06 +0100 Subject: [PATCH 17/53] Add support for linux --- ankihub/labs/llm/llm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index dd53c976b..941fb822c 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -168,15 +168,15 @@ def _check_and_install_uv() -> None: print("uv is installed") except (subprocess.CalledProcessError, FileNotFoundError): try: - if platform.system() == "Darwin": # macOS + if platform.system() == "Windows": subprocess.run( - "curl -LsSf https://astral.sh/uv/install.sh | sh", + 'powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"', shell=True, check=True, ) - elif platform.system() == "Windows": + else: # macOS and Linux subprocess.run( - 'powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"', + "curl -LsSf https://astral.sh/uv/install.sh | sh", shell=True, check=True, ) From 299d9e9c63403f308b8c9e88bb7aa11a94ef8cb0 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Wed, 12 Feb 2025 17:56:42 +0100 Subject: [PATCH 18/53] Fix error in AddCardsDialog --- ankihub/labs/llm/llm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 941fb822c..98f35fae1 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -289,9 +289,7 @@ def _update_note_fields(editor: Editor, new_fields: Dict[str, str]) -> None: if field_name in note: note[field_name] = new_content - # Save changes and update the editor - note.flush() - editor.loadNote() + editor.loadNoteKeepingFocus() def _create_diff_html(original: str, suggested: str) -> str: From d58574ab0a029c942a73eb6028f50fdcbec6774e Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Wed, 12 Feb 2025 17:57:20 +0100 Subject: [PATCH 19/53] Fix dialog focus after updating note --- ankihub/labs/llm/llm.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 98f35fae1..681712c52 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -7,13 +7,14 @@ from pathlib import Path from typing import Any, Dict, List -import aqt from aqt import QFont, gui_hooks from aqt.editor import Editor from aqt.qt import QDialog, QHBoxLayout, QLabel, QPushButton, QTextEdit, QVBoxLayout from aqt.utils import showWarning, tooltip from jinja2 import Template +from ...gui.utils import active_window_or_mw + PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" @@ -324,7 +325,7 @@ def _show_llm_response(editor: Editor, response: str) -> None: showWarning("Invalid JSON response from LLM") return - dialog = QDialog(aqt.mw) + dialog = QDialog(active_window_or_mw()) dialog.setWindowTitle("LLM Response - Field Changes") dialog.setMinimumWidth(800) dialog.setMinimumHeight(600) @@ -371,7 +372,7 @@ def _show_llm_response(editor: Editor, response: str) -> None: layout.addLayout(button_layout) dialog.setLayout(layout) - dialog.exec() + dialog.open() def _handle_update_note(editor: Editor, response: str, dialog: QDialog) -> None: @@ -396,8 +397,8 @@ def _handle_update_note(editor: Editor, response: str, dialog: QDialog) -> None: def _handle_prompt_selection(editor: Editor, template_name: str) -> None: """Handle the selection of a prompt template.""" - dialog = PromptPreviewDialog(None, template_name, editor) - dialog.exec() + dialog = PromptPreviewDialog(active_window_or_mw(), template_name, editor) + dialog.open() def _execute_prompt_template( From 649677ef8d30405fd2140e890d2ba24eab516011 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Thu, 13 Feb 2025 15:21:31 +0000 Subject: [PATCH 20/53] Add secrets dialog. --- ankihub/gui/menu.py | 3 + ankihub/labs/__init__.py | 1 + ankihub/labs/gui/__init__.py | 1 + ankihub/labs/gui/menu.py | 18 ++++ ankihub/labs/secrets/__init__.py | 9 ++ ankihub/labs/secrets/dialog.py | 145 +++++++++++++++++++++++++++++++ 6 files changed, 177 insertions(+) create mode 100644 ankihub/labs/__init__.py create mode 100644 ankihub/labs/gui/__init__.py create mode 100644 ankihub/labs/gui/menu.py create mode 100644 ankihub/labs/secrets/__init__.py create mode 100644 ankihub/labs/secrets/dialog.py diff --git a/ankihub/gui/menu.py b/ankihub/gui/menu.py index 2f36591aa..d9b1f18f4 100644 --- a/ankihub/gui/menu.py +++ b/ankihub/gui/menu.py @@ -24,6 +24,7 @@ from ..addon_ankihub_client import AddonAnkiHubClient as AnkiHubClient from ..ankihub_client import AnkiHubHTTPError from ..db import ankihub_db +from ..labs.gui.menu import setup_labs_menu from ..media_import.ui import open_import_dialog from ..settings import ADDON_VERSION, config from .config_dialog import get_config_dialog_manager @@ -74,6 +75,8 @@ def refresh_ankihub_menu() -> None: _config_setup(parent=menu_state.ankihub_menu) _ankihub_terms_and_policy_setup(parent=menu_state.ankihub_menu) _ankihub_help_setup(parent=menu_state.ankihub_menu) + if config.labs_enabled: + setup_labs_menu(menu_state.ankihub_menu) class AnkiHubLogin(QWidget): diff --git a/ankihub/labs/__init__.py b/ankihub/labs/__init__.py new file mode 100644 index 000000000..53cca5a95 --- /dev/null +++ b/ankihub/labs/__init__.py @@ -0,0 +1 @@ +"""AnkiHub Labs module for experimental features.""" diff --git a/ankihub/labs/gui/__init__.py b/ankihub/labs/gui/__init__.py new file mode 100644 index 000000000..66a6d0906 --- /dev/null +++ b/ankihub/labs/gui/__init__.py @@ -0,0 +1 @@ +"""GUI components for AnkiHub Labs features.""" diff --git a/ankihub/labs/gui/menu.py b/ankihub/labs/gui/menu.py new file mode 100644 index 000000000..8733bdcf9 --- /dev/null +++ b/ankihub/labs/gui/menu.py @@ -0,0 +1,18 @@ +"""AnkiHub Labs menu setup.""" + +from aqt.qt import QAction, QMenu, qconnect + +from ..secrets import open_secrets_dialog + + +def setup_labs_menu(parent_menu: QMenu) -> None: + """Set up the AnkiHub Labs menu if labs are enabled.""" + # Create Labs submenu + labs_menu = QMenu("🧪 AnkiHub Labs", parent_menu) + + # Add Secrets submenu + secrets_action = QAction("🔑 Secrets", labs_menu) + qconnect(secrets_action.triggered, open_secrets_dialog) + labs_menu.addAction(secrets_action) + + parent_menu.addMenu(labs_menu) diff --git a/ankihub/labs/secrets/__init__.py b/ankihub/labs/secrets/__init__.py new file mode 100644 index 000000000..a34c5b493 --- /dev/null +++ b/ankihub/labs/secrets/__init__.py @@ -0,0 +1,9 @@ +"""Module for managing LLM API secrets.""" + +from .dialog import SecretsDialog + + +def open_secrets_dialog() -> None: + """Open the secrets management dialog.""" + dialog = SecretsDialog() + dialog.exec() diff --git a/ankihub/labs/secrets/dialog.py b/ankihub/labs/secrets/dialog.py new file mode 100644 index 000000000..950659c87 --- /dev/null +++ b/ankihub/labs/secrets/dialog.py @@ -0,0 +1,145 @@ +"""Dialog for managing LLM API secrets.""" + +import json +import subprocess +from pathlib import Path + +from aqt.qt import QDialog, QHBoxLayout, QLabel, QLineEdit, QPushButton, QVBoxLayout +from aqt.utils import showWarning, tooltip + +from ...gui.utils import active_window_or_mw + + +class SecretsDialog(QDialog): + """Dialog for managing LLM API secrets.""" + + def __init__(self): + super().__init__(parent=active_window_or_mw()) + self.setWindowTitle("LLM API Secrets") + self.setMinimumWidth(500) + try: + self.keys_file = self._get_keys_file_path() + except Exception as e: + showWarning(str(e)) + self.reject() + return + self.current_keys = self._load_current_keys() + self._setup_ui() + + def _get_keys_file_path(self) -> Path: + """Get the path to the keys.json file.""" + try: + result = subprocess.run( + ["uv", "run", "--no-project", "llm", "keys", "path"], + capture_output=True, + text=True, + check=True, + ) + return Path(result.stdout.strip()) + except subprocess.CalledProcessError as e: + raise Exception( + "Failed to get LLM keys path. Please run 'llm setup' in your terminal first.\n\n" + f"Error: {e.stderr}" + ) + except Exception as e: + raise Exception(f"Unexpected error getting LLM keys path: {str(e)}") + + def _load_current_keys(self) -> dict: + """Load current API keys from the keys file.""" + if not self.keys_file.exists(): + return {} + try: + return json.loads(self.keys_file.read_text()) + except json.JSONDecodeError: + return {} + + def _save_keys(self) -> None: + """Save API keys to the keys file.""" + if not self.keys_file.parent.exists(): + showWarning( + "Cannot save API keys: The llm config directory does not exist.\n" + "Please run 'llm setup' in your terminal first." + ) + return + + try: + self.keys_file.write_text(json.dumps(self.current_keys, indent=2)) + tooltip("API keys saved successfully") + except (OSError, IOError) as e: + showWarning(f"Failed to save API keys: {str(e)}") + + def _setup_ui(self) -> None: + """Set up the dialog UI.""" + layout = QVBoxLayout() + + # Add description + description = QLabel( + "Enter your API keys for the LLM providers below. " + "Keys are stored securely in your local config." + ) + description.setWordWrap(True) + layout.addWidget(description) + + # Create input fields for each provider + providers = { + "gemini": "Google Gemini API Key", + "perplexity": "Perplexity API Key", + "claude": "Anthropic (Claude) API Key", + "openai": "OpenAI API Key", + } + + self.key_inputs = {} + for provider_id, provider_name in providers.items(): + # Create a horizontal layout for each provider + provider_layout = QHBoxLayout() + + # Add label + label = QLabel(f"{provider_name}:") + provider_layout.addWidget(label) + + # Add secure input field + input_field = QLineEdit() + input_field.setEchoMode(QLineEdit.EchoMode.Password) + if provider_id in self.current_keys: + input_field.setText(self.current_keys[provider_id]) + # Show a small indicator that a key exists + label.setText(f"{provider_name}: 🔑") + self.key_inputs[provider_id] = input_field + provider_layout.addWidget(input_field) + + # Add show/hide button + toggle_btn = QPushButton("👁️") + toggle_btn.setFixedWidth(30) + toggle_btn.clicked.connect( + lambda checked, field=input_field: self._toggle_password_visibility( + field + ) + ) + provider_layout.addWidget(toggle_btn) + + layout.addLayout(provider_layout) + + # Add save button + save_btn = QPushButton("Save Keys") + save_btn.clicked.connect(self._on_save) + layout.addWidget(save_btn) + + self.setLayout(layout) + + def _toggle_password_visibility(self, field: QLineEdit) -> None: + """Toggle the visibility of the password field.""" + if field.echoMode() == QLineEdit.EchoMode.Password: + field.setEchoMode(QLineEdit.EchoMode.Normal) + else: + field.setEchoMode(QLineEdit.EchoMode.Password) + + def _on_save(self) -> None: + """Handle saving the API keys.""" + for provider_id, input_field in self.key_inputs.items(): + key = input_field.text().strip() + if key: + self.current_keys[provider_id] = key + elif provider_id in self.current_keys: + del self.current_keys[provider_id] + + self._save_keys() From 63f542123305edf688f6d0cbf9bd5491b988ba3b Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Thu, 13 Feb 2025 15:53:51 +0000 Subject: [PATCH 21/53] Improve the prompt preview dialog. --- .pre-commit-config.yaml | 6 + ankihub/labs/llm/llm.py | 118 ++++++++++++++++-- .../llm/prompt_templates/anki-fact-check.yaml | 5 + .../llm/prompt_templates/anki-improve.yaml | 1 + 4 files changed, 119 insertions(+), 11 deletions(-) create mode 100644 ankihub/labs/llm/prompt_templates/anki-fact-check.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index da8f4fca3..92fee90a2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,12 @@ repos: hooks: - id: pycln + - repo: https://github.com/PyCQA/autoflake + rev: v2.3.1 + hooks: + - id: autoflake + args: [--remove-all-unused-imports, --in-place] + - repo: local hooks: - id: no_absolute_imports_from_ankihub diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 681712c52..44693918d 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -7,9 +7,20 @@ from pathlib import Path from typing import Any, Dict, List +import yaml from aqt import QFont, gui_hooks from aqt.editor import Editor -from aqt.qt import QDialog, QHBoxLayout, QLabel, QPushButton, QTextEdit, QVBoxLayout +from aqt.qt import ( + QDialog, + QHBoxLayout, + QLabel, + QMessageBox, + QPushButton, + QScrollArea, + QTextEdit, + QVBoxLayout, + QWidget, +) from aqt.utils import showWarning, tooltip from jinja2 import Template @@ -108,6 +119,32 @@ def get_anki_templates(cls) -> List[str]: except Exception: return ["Error listing templates"] + @classmethod + def save_template(cls, template_name: str, content: str) -> None: + """Save a template with the given content. + + Args: + template_name: Name of the template without .yaml extension + content: YAML content to save + + Raises: + Exception: If templates directory is not found or if saving fails + """ + templates_path = cls.get_templates_path() + if not templates_path: + raise Exception("Templates directory not found") + + try: + # Validate YAML content before saving + yaml.safe_load(content) + + template_file = templates_path / f"{template_name}.yaml" + template_file.write_text(content) + except yaml.YAMLError as e: + raise Exception(f"Invalid YAML content: {str(e)}") + except Exception as e: + raise Exception(f"Failed to save template: {str(e)}") + class PromptPreviewDialog(QDialog): """Dialog for previewing and editing a prompt template before execution.""" @@ -117,10 +154,18 @@ def __init__(self, parent, template_name: str, editor: Editor) -> None: self.template_name = template_name self.editor = editor self.template_content = TemplateManager.get_template_content(template_name) + try: + self.yaml_data = yaml.safe_load(self.template_content) + if not isinstance(self.yaml_data, dict): + self.yaml_data = {} + except yaml.YAMLError: + self.yaml_data = {} + + self.section_editors = {} # Store references to editors for each section self._setup_ui() def _setup_ui(self) -> None: - """Set up the dialog UI.""" + """Set up the dialog UI with all YAML sections visible.""" self.setWindowTitle(f"Preview Template: {self.template_name}") self.setMinimumWidth(800) self.setMinimumHeight(600) @@ -128,15 +173,46 @@ def _setup_ui(self) -> None: layout = QVBoxLayout() # Add description label - description = QLabel("Review and edit the prompt template below:") + description = QLabel("Edit the template sections below:") description.setWordWrap(True) layout.addWidget(description) - # Add template editor - self.template_edit = QTextEdit() - self.template_edit.setPlainText(self.template_content) - self.template_edit.setFont(QFont("Consolas")) - layout.addWidget(self.template_edit) + # Create a scroll area for sections + scroll_area = QWidget() + scroll_layout = QVBoxLayout() + scroll_area.setLayout(scroll_layout) + + # Create an editor for each YAML section + for key, value in self.yaml_data.items(): + section_widget = QWidget() + section_layout = QVBoxLayout() + + # Add section label + label = QLabel(f"{key}:") + label.setFont(QFont("Consolas")) + section_layout.addWidget(label) + + # Add section editor + editor = QTextEdit() + editor.setPlainText(str(value)) + editor.setFont(QFont("Consolas")) + editor.setMinimumHeight(100) # Ensure minimum visibility + section_layout.addWidget(editor) + + # Store reference to editor + self.section_editors[key] = editor + + section_widget.setLayout(section_layout) + scroll_layout.addWidget(section_widget) + + # Add spacer at the bottom to push content up + scroll_layout.addStretch() + + # Create a scroll area container + scroll_container = QScrollArea() + scroll_container.setWidget(scroll_area) + scroll_container.setWidgetResizable(True) + layout.addWidget(scroll_container) # Add button row button_layout = QHBoxLayout() @@ -146,6 +222,11 @@ def _setup_ui(self) -> None: execute_button.clicked.connect(self._on_execute) button_layout.addWidget(execute_button) + # Save button + save_button = QPushButton("Save Template") + save_button.clicked.connect(self._on_save) + button_layout.addWidget(save_button) + # Cancel button cancel_button = QPushButton("Cancel") cancel_button.clicked.connect(self.reject) @@ -154,11 +235,26 @@ def _setup_ui(self) -> None: layout.addLayout(button_layout) self.setLayout(layout) + def _get_yaml_content(self) -> str: + """Generate valid YAML content from the current field values.""" + data = { + key: editor.toPlainText() for key, editor in self.section_editors.items() + } + return yaml.safe_dump(data, default_flow_style=False, sort_keys=False) + + def _on_save(self) -> None: + """Save the modified template.""" + try: + modified_content = self._get_yaml_content() + TemplateManager.save_template(self.template_name, modified_content) + QMessageBox.information(self, "Success", "Template saved successfully!") + except Exception as e: + QMessageBox.critical(self, "Error", f"Failed to save template: {str(e)}") + def _on_execute(self) -> None: """Handle the execute button click.""" - modified_template = self.template_edit.toPlainText() - # TODO Save the modified template if it differs from the original - _execute_prompt_template(self.editor, self.template_name, modified_template) + modified_content = self._get_yaml_content() + _execute_prompt_template(self.editor, self.template_name, modified_content) self.accept() diff --git a/ankihub/labs/llm/prompt_templates/anki-fact-check.yaml b/ankihub/labs/llm/prompt_templates/anki-fact-check.yaml new file mode 100644 index 000000000..f4c58e5a5 --- /dev/null +++ b/ankihub/labs/llm/prompt_templates/anki-fact-check.yaml @@ -0,0 +1,5 @@ +model: perplexity +system: "Your job is to fact check the following bit of information. Respond succinctly, with no more than a single paragraph explaining why the provided information is true or false." +prompt: | + Here is the card to improve: + $input diff --git a/ankihub/labs/llm/prompt_templates/anki-improve.yaml b/ankihub/labs/llm/prompt_templates/anki-improve.yaml index 844fb3bce..6108eb7db 100644 --- a/ankihub/labs/llm/prompt_templates/anki-improve.yaml +++ b/ankihub/labs/llm/prompt_templates/anki-improve.yaml @@ -29,3 +29,4 @@ system: | prompt: | Here is the card to improve: $input +model: gpt-4o From c8da4c2f8b6ade06ed8e71e747fdb5d9c4563afe Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Thu, 13 Feb 2025 15:59:30 +0000 Subject: [PATCH 22/53] subproject commit. --- ankihub/gui/ankiaddonconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ankihub/gui/ankiaddonconfig b/ankihub/gui/ankiaddonconfig index 247ebd050..1c45c6e7f 160000 --- a/ankihub/gui/ankiaddonconfig +++ b/ankihub/gui/ankiaddonconfig @@ -1 +1 @@ -Subproject commit 247ebd050222f6b6487ddb37c7ac322eb63521e3 +Subproject commit 1c45c6e7f2075e3338b21bcf99430f9822ccc7cf From f5a7106a872a6ab7680077052c7dfcd1a33aa42c Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Thu, 13 Feb 2025 17:21:11 +0000 Subject: [PATCH 23/53] Move import --- ankihub/entry_point.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ankihub/entry_point.py b/ankihub/entry_point.py index b540d8b43..a82596b77 100644 --- a/ankihub/entry_point.py +++ b/ankihub/entry_point.py @@ -29,7 +29,6 @@ from .gui.media_sync import media_sync from .gui.menu import menu_state, refresh_ankihub_menu, setup_ankihub_menu from .gui.operations.ankihub_sync import setup_full_sync_patch -from .labs.llm import llm from .main.note_deletion import handle_notes_deleted_from_webapp from .main.utils import modify_note_type_templates from .settings import ( @@ -240,6 +239,8 @@ def _general_setup(): ) if config.labs_enabled: + from .labs.llm import llm + llm.setup() LOGGER.info("Set up LLM prompt functionality.") From 1bf8f150b20e3ae5b4975ebf9e0e8bc18d7eb578 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Thu, 13 Feb 2025 17:55:44 +0000 Subject: [PATCH 24/53] Add yaml lib. --- ankihub/lib/PyYAML-6.0.2.dist-info/INSTALLER | 1 + ankihub/lib/PyYAML-6.0.2.dist-info/LICENSE | 20 + ankihub/lib/PyYAML-6.0.2.dist-info/METADATA | 46 + ankihub/lib/PyYAML-6.0.2.dist-info/RECORD | 44 + ankihub/lib/PyYAML-6.0.2.dist-info/REQUESTED | 0 ankihub/lib/PyYAML-6.0.2.dist-info/WHEEL | 5 + .../lib/PyYAML-6.0.2.dist-info/top_level.txt | 2 + ankihub/lib/_yaml/__init__.py | 33 + ankihub/lib/yaml/__init__.py | 390 +++++ ankihub/lib/yaml/composer.py | 139 ++ ankihub/lib/yaml/constructor.py | 748 +++++++++ ankihub/lib/yaml/cyaml.py | 101 ++ ankihub/lib/yaml/dumper.py | 62 + ankihub/lib/yaml/emitter.py | 1137 +++++++++++++ ankihub/lib/yaml/error.py | 75 + ankihub/lib/yaml/events.py | 86 + ankihub/lib/yaml/loader.py | 63 + ankihub/lib/yaml/nodes.py | 49 + ankihub/lib/yaml/parser.py | 589 +++++++ ankihub/lib/yaml/reader.py | 185 +++ ankihub/lib/yaml/representer.py | 389 +++++ ankihub/lib/yaml/resolver.py | 227 +++ ankihub/lib/yaml/scanner.py | 1435 +++++++++++++++++ ankihub/lib/yaml/serializer.py | 111 ++ ankihub/lib/yaml/tokens.py | 104 ++ 25 files changed, 6041 insertions(+) create mode 100644 ankihub/lib/PyYAML-6.0.2.dist-info/INSTALLER create mode 100644 ankihub/lib/PyYAML-6.0.2.dist-info/LICENSE create mode 100644 ankihub/lib/PyYAML-6.0.2.dist-info/METADATA create mode 100644 ankihub/lib/PyYAML-6.0.2.dist-info/RECORD create mode 100644 ankihub/lib/PyYAML-6.0.2.dist-info/REQUESTED create mode 100644 ankihub/lib/PyYAML-6.0.2.dist-info/WHEEL create mode 100644 ankihub/lib/PyYAML-6.0.2.dist-info/top_level.txt create mode 100644 ankihub/lib/_yaml/__init__.py create mode 100644 ankihub/lib/yaml/__init__.py create mode 100644 ankihub/lib/yaml/composer.py create mode 100644 ankihub/lib/yaml/constructor.py create mode 100644 ankihub/lib/yaml/cyaml.py create mode 100644 ankihub/lib/yaml/dumper.py create mode 100644 ankihub/lib/yaml/emitter.py create mode 100644 ankihub/lib/yaml/error.py create mode 100644 ankihub/lib/yaml/events.py create mode 100644 ankihub/lib/yaml/loader.py create mode 100644 ankihub/lib/yaml/nodes.py create mode 100644 ankihub/lib/yaml/parser.py create mode 100644 ankihub/lib/yaml/reader.py create mode 100644 ankihub/lib/yaml/representer.py create mode 100644 ankihub/lib/yaml/resolver.py create mode 100644 ankihub/lib/yaml/scanner.py create mode 100644 ankihub/lib/yaml/serializer.py create mode 100644 ankihub/lib/yaml/tokens.py diff --git a/ankihub/lib/PyYAML-6.0.2.dist-info/INSTALLER b/ankihub/lib/PyYAML-6.0.2.dist-info/INSTALLER new file mode 100644 index 000000000..a1b589e38 --- /dev/null +++ b/ankihub/lib/PyYAML-6.0.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/ankihub/lib/PyYAML-6.0.2.dist-info/LICENSE b/ankihub/lib/PyYAML-6.0.2.dist-info/LICENSE new file mode 100644 index 000000000..2f1b8e15e --- /dev/null +++ b/ankihub/lib/PyYAML-6.0.2.dist-info/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2017-2021 Ingy döt Net +Copyright (c) 2006-2016 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ankihub/lib/PyYAML-6.0.2.dist-info/METADATA b/ankihub/lib/PyYAML-6.0.2.dist-info/METADATA new file mode 100644 index 000000000..db029b770 --- /dev/null +++ b/ankihub/lib/PyYAML-6.0.2.dist-info/METADATA @@ -0,0 +1,46 @@ +Metadata-Version: 2.1 +Name: PyYAML +Version: 6.0.2 +Summary: YAML parser and emitter for Python +Home-page: https://pyyaml.org/ +Download-URL: https://pypi.org/project/PyYAML/ +Author: Kirill Simonov +Author-email: xi@resolvent.net +License: MIT +Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues +Project-URL: CI, https://github.com/yaml/pyyaml/actions +Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation +Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core +Project-URL: Source Code, https://github.com/yaml/pyyaml +Platform: Any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Cython +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup +Requires-Python: >=3.8 +License-File: LICENSE + +YAML is a data serialization format designed for human readability +and interaction with scripting languages. PyYAML is a YAML parser +and emitter for Python. + +PyYAML features a complete YAML 1.1 parser, Unicode support, pickle +support, capable extension API, and sensible error messages. PyYAML +supports standard YAML tags and provides Python-specific tags that +allow to represent an arbitrary Python object. + +PyYAML is applicable for a broad range of tasks from complex +configuration files to object serialization and persistence. diff --git a/ankihub/lib/PyYAML-6.0.2.dist-info/RECORD b/ankihub/lib/PyYAML-6.0.2.dist-info/RECORD new file mode 100644 index 000000000..9828953f4 --- /dev/null +++ b/ankihub/lib/PyYAML-6.0.2.dist-info/RECORD @@ -0,0 +1,44 @@ +PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101 +PyYAML-6.0.2.dist-info/METADATA,sha256=9-odFB5seu4pGPcEv7E8iyxNF51_uKnaNGjLAhz2lto,2060 +PyYAML-6.0.2.dist-info/RECORD,, +PyYAML-6.0.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +PyYAML-6.0.2.dist-info/WHEEL,sha256=LFVzND6nAdWMS-norKkn42oL86bk-j1PiLvh1xzptX0,110 +PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11 +_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402 +_yaml/__pycache__/__init__.cpython-311.pyc,, +yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311 +yaml/__pycache__/__init__.cpython-311.pyc,, +yaml/__pycache__/composer.cpython-311.pyc,, +yaml/__pycache__/constructor.cpython-311.pyc,, +yaml/__pycache__/cyaml.cpython-311.pyc,, +yaml/__pycache__/dumper.cpython-311.pyc,, +yaml/__pycache__/emitter.cpython-311.pyc,, +yaml/__pycache__/error.cpython-311.pyc,, +yaml/__pycache__/events.cpython-311.pyc,, +yaml/__pycache__/loader.cpython-311.pyc,, +yaml/__pycache__/nodes.cpython-311.pyc,, +yaml/__pycache__/parser.cpython-311.pyc,, +yaml/__pycache__/reader.cpython-311.pyc,, +yaml/__pycache__/representer.cpython-311.pyc,, +yaml/__pycache__/resolver.cpython-311.pyc,, +yaml/__pycache__/scanner.cpython-311.pyc,, +yaml/__pycache__/serializer.cpython-311.pyc,, +yaml/__pycache__/tokens.cpython-311.pyc,, +yaml/_yaml.cpython-311-darwin.so,sha256=YdahBTjS8KitV8Lm6bzs2ON1yRZtfHTI-UguNFhwElI,359272 +yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883 +yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639 +yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851 +yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837 +yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006 +yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533 +yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445 +yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061 +yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440 +yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495 +yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794 +yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190 +yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004 +yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279 +yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165 +yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573 diff --git a/ankihub/lib/PyYAML-6.0.2.dist-info/REQUESTED b/ankihub/lib/PyYAML-6.0.2.dist-info/REQUESTED new file mode 100644 index 000000000..e69de29bb diff --git a/ankihub/lib/PyYAML-6.0.2.dist-info/WHEEL b/ankihub/lib/PyYAML-6.0.2.dist-info/WHEEL new file mode 100644 index 000000000..cdc482d11 --- /dev/null +++ b/ankihub/lib/PyYAML-6.0.2.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.44.0) +Root-Is-Purelib: false +Tag: cp311-cp311-macosx_11_0_arm64 + diff --git a/ankihub/lib/PyYAML-6.0.2.dist-info/top_level.txt b/ankihub/lib/PyYAML-6.0.2.dist-info/top_level.txt new file mode 100644 index 000000000..e6475e911 --- /dev/null +++ b/ankihub/lib/PyYAML-6.0.2.dist-info/top_level.txt @@ -0,0 +1,2 @@ +_yaml +yaml diff --git a/ankihub/lib/_yaml/__init__.py b/ankihub/lib/_yaml/__init__.py new file mode 100644 index 000000000..7baa8c4b6 --- /dev/null +++ b/ankihub/lib/_yaml/__init__.py @@ -0,0 +1,33 @@ +# This is a stub package designed to roughly emulate the _yaml +# extension module, which previously existed as a standalone module +# and has been moved into the `yaml` package namespace. +# It does not perfectly mimic its old counterpart, but should get +# close enough for anyone who's relying on it even when they shouldn't. +import yaml + +# in some circumstances, the yaml module we imoprted may be from a different version, so we need +# to tread carefully when poking at it here (it may not have the attributes we expect) +if not getattr(yaml, '__with_libyaml__', False): + from sys import version_info + + exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError + raise exc("No module named '_yaml'") +else: + from yaml._yaml import * + import warnings + warnings.warn( + 'The _yaml extension module is now located at yaml._yaml' + ' and its location is subject to change. To use the' + ' LibYAML-based parser and emitter, import from `yaml`:' + ' `from yaml import CLoader as Loader, CDumper as Dumper`.', + DeprecationWarning + ) + del warnings + # Don't `del yaml` here because yaml is actually an existing + # namespace member of _yaml. + +__name__ = '_yaml' +# If the module is top-level (i.e. not a part of any specific package) +# then the attribute should be set to ''. +# https://docs.python.org/3.8/library/types.html +__package__ = '' diff --git a/ankihub/lib/yaml/__init__.py b/ankihub/lib/yaml/__init__.py new file mode 100644 index 000000000..2ec4f203c --- /dev/null +++ b/ankihub/lib/yaml/__init__.py @@ -0,0 +1,390 @@ + +from .error import * + +from .tokens import * +from .events import * +from .nodes import * + +from .loader import * +from .dumper import * + +__version__ = '6.0.2' +try: + from .cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +import io + +#------------------------------------------------------------------------------ +# XXX "Warnings control" is now deprecated. Leaving in the API function to not +# break code that uses it. +#------------------------------------------------------------------------------ +def warnings(settings=None): + if settings is None: + return {} + +#------------------------------------------------------------------------------ +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def full_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve all tags except those known to be + unsafe on untrusted input. + """ + return load(stream, FullLoader) + +def full_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve all tags except those known to be + unsafe on untrusted input. + """ + return load_all(stream, FullLoader) + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve only basic YAML tags. This is known + to be safe for untrusted input. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve only basic YAML tags. This is known + to be safe for untrusted input. + """ + return load_all(stream, SafeLoader) + +def unsafe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + + Resolve all tags, even those known to be + unsafe on untrusted input. + """ + return load(stream, UnsafeLoader) + +def unsafe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + + Resolve all tags, even those known to be + unsafe on untrusted input. + """ + return load_all(stream, UnsafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + stream = io.StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=None, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + if Loader is None: + loader.Loader.add_implicit_resolver(tag, regexp, first) + loader.FullLoader.add_implicit_resolver(tag, regexp, first) + loader.UnsafeLoader.add_implicit_resolver(tag, regexp, first) + else: + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=None, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + if Loader is None: + loader.Loader.add_path_resolver(tag, path, kind) + loader.FullLoader.add_path_resolver(tag, path, kind) + loader.UnsafeLoader.add_path_resolver(tag, path, kind) + else: + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=None): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + if Loader is None: + loader.Loader.add_constructor(tag, constructor) + loader.FullLoader.add_constructor(tag, constructor) + loader.UnsafeLoader.add_constructor(tag, constructor) + else: + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=None): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + if Loader is None: + loader.Loader.add_multi_constructor(tag_prefix, multi_constructor) + loader.FullLoader.add_multi_constructor(tag_prefix, multi_constructor) + loader.UnsafeLoader.add_multi_constructor(tag_prefix, multi_constructor) + else: + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + if isinstance(cls.yaml_loader, list): + for loader in cls.yaml_loader: + loader.add_constructor(cls.yaml_tag, cls.from_yaml) + else: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(metaclass=YAMLObjectMetaclass): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = [Loader, FullLoader, UnsafeLoader] + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + @classmethod + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + diff --git a/ankihub/lib/yaml/composer.py b/ankihub/lib/yaml/composer.py new file mode 100644 index 000000000..6d15cb40e --- /dev/null +++ b/ankihub/lib/yaml/composer.py @@ -0,0 +1,139 @@ + +__all__ = ['Composer', 'ComposerError'] + +from .error import MarkedYAMLError +from .events import * +from .nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer: + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor, event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurrence" + % anchor, self.anchors[anchor].start_mark, + "second occurrence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == '!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/ankihub/lib/yaml/constructor.py b/ankihub/lib/yaml/constructor.py new file mode 100644 index 000000000..619acd307 --- /dev/null +++ b/ankihub/lib/yaml/constructor.py @@ -0,0 +1,748 @@ + +__all__ = [ + 'BaseConstructor', + 'SafeConstructor', + 'FullConstructor', + 'UnsafeConstructor', + 'Constructor', + 'ConstructorError' +] + +from .error import * +from .nodes import * + +import collections.abc, datetime, base64, binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor: + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def check_state_key(self, key): + """Block special attributes/methods from being set in a newly created + object, to prevent user-controlled methods from being called during + deserialization""" + if self.get_state_keys_blacklist_regexp().match(key): + raise ConstructorError(None, None, + "blacklisted key '%s' in instance state found" % (key,), None) + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if tag_prefix is not None and node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if not isinstance(key, collections.abc.Hashable): + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + @classmethod + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + + @classmethod + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == 'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return super().construct_scalar(node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return super().construct_mapping(node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + 'yes': True, + 'no': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + r'''^(?P[0-9][0-9][0-9][0-9]) + -(?P[0-9][0-9]?) + -(?P[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P[0-9][0-9]?) + :(?P[0-9][0-9]) + :(?P[0-9][0-9]) + (?:\.(?P[0-9]*))? + (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) + (?::(?P[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + tzinfo = None + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + tzinfo = datetime.timezone(delta) + elif values['tz']: + tzinfo = datetime.timezone.utc + return datetime.datetime(year, month, day, hour, minute, second, fraction, + tzinfo=tzinfo) + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag, + node.start_mark) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class FullConstructor(SafeConstructor): + # 'extend' is blacklisted because it is used by + # construct_python_object_apply to add `listitems` to a newly generate + # python instance + def get_state_keys_blacklist(self): + return ['^extend$', '^__.*__$'] + + def get_state_keys_blacklist_regexp(self): + if not hasattr(self, 'state_keys_blacklist_regexp'): + self.state_keys_blacklist_regexp = re.compile('(' + '|'.join(self.get_state_keys_blacklist()) + ')') + return self.state_keys_blacklist_regexp + + def construct_python_str(self, node): + return self.construct_scalar(node) + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + def construct_python_long(self, node): + return self.construct_yaml_int(node) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark, unsafe=False): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + if unsafe: + try: + __import__(name) + except ImportError as exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name, exc), mark) + if name not in sys.modules: + raise ConstructorError("while constructing a Python module", mark, + "module %r is not imported" % name, mark) + return sys.modules[name] + + def find_python_name(self, name, mark, unsafe=False): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if '.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = 'builtins' + object_name = name + if unsafe: + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name, exc), mark) + if module_name not in sys.modules: + raise ConstructorError("while constructing a Python object", mark, + "module %r is not imported" % module_name, mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" + % (object_name, module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False, unsafe=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if not (unsafe or isinstance(cls, type)): + raise ConstructorError("while constructing a Python instance", node.start_mark, + "expected a class, but found %r" % type(cls), + node.start_mark) + if newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state, unsafe=False): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + if not unsafe and state: + for key in state.keys(): + self.check_state_key(key) + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + if not unsafe: + self.check_state_key(key) + setattr(instance, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/none', + FullConstructor.construct_yaml_null) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/bool', + FullConstructor.construct_yaml_bool) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/str', + FullConstructor.construct_python_str) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/unicode', + FullConstructor.construct_python_unicode) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/bytes', + FullConstructor.construct_python_bytes) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/int', + FullConstructor.construct_yaml_int) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/long', + FullConstructor.construct_python_long) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/float', + FullConstructor.construct_yaml_float) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/complex', + FullConstructor.construct_python_complex) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/list', + FullConstructor.construct_yaml_seq) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/tuple', + FullConstructor.construct_python_tuple) + +FullConstructor.add_constructor( + 'tag:yaml.org,2002:python/dict', + FullConstructor.construct_yaml_map) + +FullConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/name:', + FullConstructor.construct_python_name) + +class UnsafeConstructor(FullConstructor): + + def find_python_module(self, name, mark): + return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True) + + def find_python_name(self, name, mark): + return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True) + + def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False): + return super(UnsafeConstructor, self).make_python_instance( + suffix, node, args, kwds, newobj, unsafe=True) + + def set_python_instance_state(self, instance, state): + return super(UnsafeConstructor, self).set_python_instance_state( + instance, state, unsafe=True) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/module:', + UnsafeConstructor.construct_python_module) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object:', + UnsafeConstructor.construct_python_object) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/new:', + UnsafeConstructor.construct_python_object_new) + +UnsafeConstructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/apply:', + UnsafeConstructor.construct_python_object_apply) + +# Constructor is same as UnsafeConstructor. Need to leave this in place in case +# people have extended it directly. +class Constructor(UnsafeConstructor): + pass diff --git a/ankihub/lib/yaml/cyaml.py b/ankihub/lib/yaml/cyaml.py new file mode 100644 index 000000000..0c2134587 --- /dev/null +++ b/ankihub/lib/yaml/cyaml.py @@ -0,0 +1,101 @@ + +__all__ = [ + 'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper' +] + +from yaml._yaml import CParser, CEmitter + +from .constructor import * + +from .serializer import * +from .representer import * + +from .resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CFullLoader(CParser, FullConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + FullConstructor.__init__(self) + Resolver.__init__(self) + +class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + UnsafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + diff --git a/ankihub/lib/yaml/dumper.py b/ankihub/lib/yaml/dumper.py new file mode 100644 index 000000000..6aadba551 --- /dev/null +++ b/ankihub/lib/yaml/dumper.py @@ -0,0 +1,62 @@ + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=False, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None, sort_keys=True): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style, sort_keys=sort_keys) + Resolver.__init__(self) + diff --git a/ankihub/lib/yaml/emitter.py b/ankihub/lib/yaml/emitter.py new file mode 100644 index 000000000..a664d0111 --- /dev/null +++ b/ankihub/lib/yaml/emitter.py @@ -0,0 +1,1137 @@ + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from .error import YAMLError +from .events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis: + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter: + + DEFAULT_TAG_PREFIXES = { + '!' : '!', + 'tag:yaml.org,2002:' : '!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overridden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = '\n' + if line_break in ['\r', '\n', '\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not hasattr(self.stream, 'encoding'): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator('...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = sorted(self.event.tags.keys()) + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator('---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator('...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator('...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor('&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor('*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator('[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator(']', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator('{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator('}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator('}', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator('-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == '') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = '!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return '%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError("tag handle must start and end with '!': %r" % handle) + for ch in handle[1:-1]: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch, handle)) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == '!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return ''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == '!': + return tag + handle = None + suffix = tag + prefixes = sorted(self.tag_prefixes.keys()) + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.~*\'()[]' \ + or (ch == '!' and handle != '!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ch) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = ''.join(chunks) + if handle: + return '%s%s' % (handle, suffix_text) + else: + return '!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch, anchor)) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith('---') or scalar.startswith('...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in '\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in '#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in '?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in ',?[]{}': + flow_indicators = True + if ch == ':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '#' and preceded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in '\n\x85\u2028\u2029': + line_breaks = True + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD' + or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == ' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in '\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write('\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = ' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = ' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = '%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = '%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator('\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != ' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == '\'': + data = '\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + self.write_indicator('\'', False) + + ESCAPE_REPLACEMENTS = { + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '\"': '\"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator('"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ + or not ('\x20' <= ch <= '\x7E' + or (self.allow_unicode + and ('\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = '\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) + else: + data = '\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == ' ': + data = '\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator('"', False) + + def determine_block_hints(self, text): + hints = '' + if text: + if text[0] in ' \n\x85\u2028\u2029': + hints += str(self.best_indent) + if text[-1] not in '\n\x85\u2028\u2029': + hints += '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + hints += '+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('>'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != ' ' \ + and text[start] == '\n': + self.write_line_break() + leading_space = (ch == ' ') + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + spaces = (ch == ' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('|'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in '\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = ' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 diff --git a/ankihub/lib/yaml/error.py b/ankihub/lib/yaml/error.py new file mode 100644 index 000000000..b796b4dc5 --- /dev/null +++ b/ankihub/lib/yaml/error.py @@ -0,0 +1,75 @@ + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark: + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end] + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/ankihub/lib/yaml/events.py b/ankihub/lib/yaml/events.py new file mode 100644 index 000000000..f79ad389c --- /dev/null +++ b/ankihub/lib/yaml/events.py @@ -0,0 +1,86 @@ + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/ankihub/lib/yaml/loader.py b/ankihub/lib/yaml/loader.py new file mode 100644 index 000000000..e90c11224 --- /dev/null +++ b/ankihub/lib/yaml/loader.py @@ -0,0 +1,63 @@ + +__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader'] + +from .reader import * +from .scanner import * +from .parser import * +from .composer import * +from .constructor import * +from .resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + FullConstructor.__init__(self) + Resolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + +# UnsafeLoader is the same as Loader (which is and was always unsafe on +# untrusted input). Use of either Loader or UnsafeLoader should be rare, since +# FullLoad should be able to load almost all YAML safely. Loader is left intact +# to ensure backwards compatibility. +class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) diff --git a/ankihub/lib/yaml/nodes.py b/ankihub/lib/yaml/nodes.py new file mode 100644 index 000000000..c4f070c41 --- /dev/null +++ b/ankihub/lib/yaml/nodes.py @@ -0,0 +1,49 @@ + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/ankihub/lib/yaml/parser.py b/ankihub/lib/yaml/parser.py new file mode 100644 index 000000000..13a5995d2 --- /dev/null +++ b/ankihub/lib/yaml/parser.py @@ -0,0 +1,589 @@ + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser: + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + '!': '!', + '!!': 'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == 'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == 'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle, + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle, + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == '!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == '!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == '!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), '', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), '', mark, mark) + diff --git a/ankihub/lib/yaml/reader.py b/ankihub/lib/yaml/reader.py new file mode 100644 index 000000000..774b0219b --- /dev/null +++ b/ankihub/lib/yaml/reader.py @@ -0,0 +1,185 @@ +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from .error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, bytes): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `bytes` object, + # - a `str` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = '' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, str): + self.name = "" + self.check_printable(stream) + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "") + self.eof = False + self.raw_buffer = None + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): + self.line += 1 + self.column = 0 + elif ch != '\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): + self.update_raw() + if isinstance(self.raw_buffer, bytes): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += '\0' + self.raw_buffer = None + break + + def update_raw(self, size=4096): + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True diff --git a/ankihub/lib/yaml/representer.py b/ankihub/lib/yaml/representer.py new file mode 100644 index 000000000..808ca06df --- /dev/null +++ b/ankihub/lib/yaml/representer.py @@ -0,0 +1,389 @@ + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from .error import * +from .nodes import * + +import datetime, copyreg, types, base64, collections + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter: + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=False, sort_keys=True): + self.default_style = default_style + self.sort_keys = sort_keys + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, str(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + @classmethod + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + + @classmethod + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = list(mapping.items()) + if self.sort_keys: + try: + mapping = sorted(mapping) + except TypeError: + pass + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data is None: + return True + if isinstance(data, tuple) and data == (): + return True + if isinstance(data, (str, bytes, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar('tag:yaml.org,2002:null', 'null') + + def represent_str(self, data): + return self.represent_scalar('tag:yaml.org,2002:str', data) + + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') + + def represent_bool(self, data): + if data: + value = 'true' + else: + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = '.nan' + elif data == self.inf_value: + value = '.inf' + elif data == -self.inf_value: + value = '-.inf' + else: + value = repr(data).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if '.' not in value and 'e' in value: + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence('tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping('tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping('tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object", data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_complex(self, data): + if data.imag == 0.0: + data = '%r' % data.real + elif data.real == 0.0: + data = '%rj' % data.imag + elif data.imag > 0: + data = '%r+%rj' % (data.real, data.imag) + else: + data = '%r%rj' % (data.real, data.imag) + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = '%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') + + def represent_module(self, data): + return self.represent_scalar( + 'tag:yaml.org,2002:python/module:'+data.__name__, '') + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent an object", data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = 'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = 'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = '%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + 'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + + def represent_ordered_dict(self, data): + # Provide uniform representation across different Python versions. + data_type = type(data) + tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \ + % (data_type.__module__, data_type.__name__) + items = [[key, value] for key, value in data.items()] + return self.represent_sequence(tag, [items]) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_multi_representer(type, + Representer.represent_name) + +Representer.add_representer(collections.OrderedDict, + Representer.represent_ordered_dict) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/ankihub/lib/yaml/resolver.py b/ankihub/lib/yaml/resolver.py new file mode 100644 index 000000000..3522bdaaf --- /dev/null +++ b/ankihub/lib/yaml/resolver.py @@ -0,0 +1,227 @@ + +__all__ = ['BaseResolver', 'Resolver'] + +from .error import * +from .nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver: + + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + @classmethod + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + implicit_resolvers = {} + for key in cls.yaml_implicit_resolvers: + implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:] + cls.yaml_implicit_resolvers = implicit_resolvers + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + + @classmethod + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, str) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (str, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, str): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, str): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + wildcard_resolvers = self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers + wildcard_resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list('yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:float', + re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9][0-9_]*(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list('-+0123456789.')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:int', + re.compile(r'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list('-+0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:merge', + re.compile(r'^(?:<<)$'), + ['<']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:null', + re.compile(r'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + ['~', 'n', 'N', '']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:timestamp', + re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list('0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:value', + re.compile(r'^(?:=)$'), + ['=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:yaml', + re.compile(r'^(?:!|&|\*)$'), + list('!&*')) + diff --git a/ankihub/lib/yaml/scanner.py b/ankihub/lib/yaml/scanner.py new file mode 100644 index 000000000..de925b07f --- /dev/null +++ b/ankihub/lib/yaml/scanner.py @@ -0,0 +1,1435 @@ + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from .error import MarkedYAMLError +from .tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey: + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner: + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + # Return None if no more tokens. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + else: + return None + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == '\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == '%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == '-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == '.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == '\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == '[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == '{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == ']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == '}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == ',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == '-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == '?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == ':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == '*': + return self.fetch_alias() + + # Is it an anchor? + if ch == '&': + return self.fetch_anchor() + + # Is it a tag? + if ch == '!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == '|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == '>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == '\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == '\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" % ch, + self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not find expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not find expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid indentation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current indentation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current indentation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current indentation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not necessary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be caught by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '---' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '...' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' + and (ch == '-' or (not self.flow_level and ch in '?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if : + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == '\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == 'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == 'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" % self.peek(), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" % self.peek(), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not ('0' <= ch <= '9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch, self.get_mark()) + length = 0 + while '0' <= self.peek(length) <= '9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == ' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != ' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch, self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpreted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == '*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == '<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != '>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek(), + self.get_mark()) + self.forward() + elif ch in '\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = '!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = '!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = '!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = '' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != '\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in ' \t' + length = 0 + while self.peek(length) not in '\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != '\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == '\n' \ + and leading_non_space and self.peek() not in ' \t': + if not breaks: + chunks.append(' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == '\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch, self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" % ch, + self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() != ' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + while self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '\"': '\"', + '\\': '\\', + '/': '/', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', + } + + ESCAPE_CODES = { + 'x': 2, + 'u': 4, + 'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == '\'' and self.peek(1) == '\'': + chunks.append('\'') + self.forward(2) + elif (double and ch == '\'') or (not double and ch in '\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == '\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexadecimal numbers, but found %r" % + (length, self.peek(k)), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(chr(code)) + self.forward(length) + elif ch in '\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch, self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in ' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == '\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in ' \t': + self.forward() + if self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',' or '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == '#': + break + while True: + ch = self.peek(length) + if ch in '\0 \t\r\n\x85\u2028\u2029' \ + or (ch == ':' and + self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029' + + (u',[]{}' if self.flow_level else u''))\ + or (self.flow_level and ch in ',?[]{}'): + break + length += 1 + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == '#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in ' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != '!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if ch != '!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.!~*\'()[]%': + if ch == '%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch, self.get_mark()) + return ''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + codes = [] + mark = self.get_mark() + while self.peek() == '%': + self.forward() + for k in range(2): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexadecimal numbers, but found %r" + % self.peek(k), self.get_mark()) + codes.append(int(self.prefix(2), 16)) + self.forward(2) + try: + value = bytes(codes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in '\r\n\x85': + if self.prefix(2) == '\r\n': + self.forward(2) + else: + self.forward() + return '\n' + elif ch in '\u2028\u2029': + self.forward() + return ch + return '' diff --git a/ankihub/lib/yaml/serializer.py b/ankihub/lib/yaml/serializer.py new file mode 100644 index 000000000..fe911e67a --- /dev/null +++ b/ankihub/lib/yaml/serializer.py @@ -0,0 +1,111 @@ + +__all__ = ['Serializer', 'SerializerError'] + +from .error import YAMLError +from .events import * +from .nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer: + + ANCHOR_TEMPLATE = 'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/ankihub/lib/yaml/tokens.py b/ankihub/lib/yaml/tokens.py new file mode 100644 index 000000000..4d0b48a39 --- /dev/null +++ b/ankihub/lib/yaml/tokens.py @@ -0,0 +1,104 @@ + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '' + +class DirectiveToken(Token): + id = '' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '' + +class DocumentEndToken(Token): + id = '' + +class StreamStartToken(Token): + id = '' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '' + +class BlockSequenceStartToken(Token): + id = '' + +class BlockMappingStartToken(Token): + id = '' + +class BlockEndToken(Token): + id = '' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + From 6899aa6313401b887d98b0aad774731153d8b3d3 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Thu, 13 Feb 2025 17:57:27 +0000 Subject: [PATCH 25/53] enable labs. --- ankihub/settings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ankihub/settings.py b/ankihub/settings.py index b1d0a1852..aa3526db7 100644 --- a/ankihub/settings.py +++ b/ankihub/settings.py @@ -193,7 +193,9 @@ def __init__(self): self.app_url: Optional[str] = None self.s3_bucket_url: Optional[str] = None self.anking_deck_id: Optional[uuid.UUID] = None - self.labs_enabled: bool = True if os.getenv("LABS_ENABLED") == "True" else False + # self.labs_enabled: bool = True if os.getenv("LABS_ENABLED") == "True" else False + # TODO Read it in from the config + self.labs_enabled: bool = True def setup_public_config_and_other_settings(self): migrate_public_config() From 55e3d990f8f5d9f4168598361291e5d4f10a7e77 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Thu, 13 Feb 2025 21:41:17 +0000 Subject: [PATCH 26/53] Create add-on first. --- .github/workflows/ci.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 26b51bea6..9c92595f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,6 +133,18 @@ jobs: - name: Download all artifacts uses: actions/download-artifact@v4 + - name: Create ankiaddon file + run: | + bash ./scripts/release.sh + env: + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + + - name: Archive .ankiaddon + uses: actions/upload-artifact@v4 + with: + name: ankihub_addon_${{ github.sha }} + path: ankihub.ankiaddon + - name: Run coverage run: | coverage combine coverage_*/.coverage @@ -159,18 +171,6 @@ jobs: with: fail_ci_if_error: false - - name: Create ankiaddon file - run: | - bash ./scripts/release.sh - env: - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - - - name: Archive .ankiaddon - uses: actions/upload-artifact@v4 - with: - name: ankihub_addon_${{ github.sha }} - path: ankihub.ankiaddon - - name: Run pre-commit run: pre-commit run --all From e3498b929f8cebf8b97bbe6c2113e28b110b21b9 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Fri, 14 Feb 2025 10:44:28 +0000 Subject: [PATCH 27/53] Save template. --- ankihub/labs/llm/llm.py | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 44693918d..6065f9dcc 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -236,11 +236,14 @@ def _setup_ui(self) -> None: self.setLayout(layout) def _get_yaml_content(self) -> str: - """Generate valid YAML content from the current field values.""" + """Generate valid YAML content from the current field values and save template.""" data = { key: editor.toPlainText() for key, editor in self.section_editors.items() } - return yaml.safe_dump(data, default_flow_style=False, sort_keys=False) + content = yaml.safe_dump(data, default_flow_style=False, sort_keys=False) + # Save the template + TemplateManager.save_template(self.template_name, content) + return content def _on_save(self) -> None: """Save the modified template.""" @@ -508,11 +511,9 @@ def _execute_prompt_template( try: # Run the LLM command with the template and note content - # Use shlex.quote to properly escape the note content for shell command import shlex escaped_content = shlex.quote(note_content) - # TODO Exclude ankihub_id field note_schema = json.dumps([{field: "string" for field in editor.note.keys()}]) cmd = [ @@ -520,33 +521,20 @@ def _execute_prompt_template( "run", "--no-project", "llm", - # TODO Allow users to choose model - # TODO Allow users to continue a conversation - # TODO Allow users to add an attachment "-m", "gpt-4o", "--no-stream", + "-t", + template_name, + "-p", + "note_schema", + shlex.quote(note_schema), + escaped_content, + "-o", + "json_object", + "1", ] - if template_content: - # If we have modified template content, pass it via stdin - cmd.extend(["-s", template_content]) - else: - # Otherwise use the template file - cmd.extend(["-t", template_name]) - - cmd.extend( - [ - "-p", - "note_schema", - shlex.quote(note_schema), - escaped_content, - "-o", - "json_object", - "1", - ] - ) - result = subprocess.run( cmd, capture_output=True, From ef94d369daa817749119aa23ccd204400b995b4a Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Fri, 14 Feb 2025 10:59:28 +0000 Subject: [PATCH 28/53] always make addon file right away. --- .github/workflows/ci.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c92595f1..52b815c84 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,6 +38,18 @@ jobs: google_api_key: ${{ secrets.GOOGLE_API_KEY }} install_qt: true + - name: Create ankiaddon file + run: | + bash ./scripts/release.sh + env: + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + + - name: Archive .ankiaddon + uses: actions/upload-artifact@v4 + with: + name: ankihub_addon_${{ github.sha }} + path: ankihub.ankiaddon + - name: Replace Anki version if: ${{ matrix.replace_anki_version }} run: pip install "${{ matrix.anki_package_version }}" @@ -133,18 +145,6 @@ jobs: - name: Download all artifacts uses: actions/download-artifact@v4 - - name: Create ankiaddon file - run: | - bash ./scripts/release.sh - env: - GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} - - - name: Archive .ankiaddon - uses: actions/upload-artifact@v4 - with: - name: ankihub_addon_${{ github.sha }} - path: ankihub.ankiaddon - - name: Run coverage run: | coverage combine coverage_*/.coverage From afcd2b6499aff797d03be9b41db280e740bd5e1d Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Fri, 14 Feb 2025 12:40:33 +0100 Subject: [PATCH 29/53] Accept result dialog before showing other dialog --- ankihub/labs/llm/llm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 6065f9dcc..e735f532f 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -476,6 +476,9 @@ def _show_llm_response(editor: Editor, response: str) -> None: def _handle_update_note(editor: Editor, response: str, dialog: QDialog) -> None: """Handle the update note button click.""" + + dialog.accept() + try: # Parse the JSON response new_fields = json.loads(response) @@ -486,7 +489,6 @@ def _handle_update_note(editor: Editor, response: str, dialog: QDialog) -> None: # Update the note _update_note_fields(editor, new_fields) tooltip("Note updated successfully") - dialog.accept() except json.JSONDecodeError: showWarning("Invalid JSON response from LLM") From 80eba5869ffc148f668508089a0ba0dbb3579391 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Fri, 14 Feb 2025 12:48:04 +0100 Subject: [PATCH 30/53] Try fix UI issue --- ankihub/labs/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index e735f532f..ec8ae5936 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -257,8 +257,8 @@ def _on_save(self) -> None: def _on_execute(self) -> None: """Handle the execute button click.""" modified_content = self._get_yaml_content() - _execute_prompt_template(self.editor, self.template_name, modified_content) self.accept() + _execute_prompt_template(self.editor, self.template_name, modified_content) def _check_and_install_uv() -> None: From fbf2a90beb50f31dd270fc5be6fddb5acbfac7ce Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Fri, 14 Feb 2025 11:48:51 +0000 Subject: [PATCH 31/53] Use shell scripts and set PATH. --- .gitignore | 5 ++--- ankihub/labs/llm/llm.py | 29 ++++------------------------- scripts/install_llm.sh | 16 ++++++++++++++++ scripts/install_uv.sh | 10 ++++++++++ scripts/release.sh | 2 +- 5 files changed, 33 insertions(+), 29 deletions(-) create mode 100755 scripts/install_llm.sh create mode 100755 scripts/install_uv.sh diff --git a/.gitignore b/.gitignore index 8fe68191e..e08816356 100644 --- a/.gitignore +++ b/.gitignore @@ -2,10 +2,9 @@ ankihub/meta.json # Anki base directory -anki_base - +anki_base/ # AnkiHub base directory -ankihub_base +ankihub_base/ # Ignore everything except the README. /ankihub/user_files/* diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index ec8ae5936..c7391c9cb 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -275,11 +275,8 @@ def _check_and_install_uv() -> None: check=True, ) else: # macOS and Linux - subprocess.run( - "curl -LsSf https://astral.sh/uv/install.sh | sh", - shell=True, - check=True, - ) + script_path = Path(__file__).parents[4] / "scripts" / "install_uv.sh" + subprocess.run([str(script_path)], check=True) tooltip("Successfully installed uv") except subprocess.CalledProcessError as e: showWarning(f"Failed to install uv: {str(e)}") @@ -293,27 +290,9 @@ def _install_llm() -> None: print("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: - # Install base llm package - subprocess.run( - ["uv", "tool", "install", "llm"], - check=True, - capture_output=True, - ) + script_path = Path(__file__).parents[4] / "scripts" / "install_llm.sh" + subprocess.run([str(script_path)], check=True) tooltip("Successfully installed llm") - - # Install additional providers - providers = ["llm-gemini", "llm-perplexity", "llm-claude-3"] - for provider in providers: - try: - subprocess.run( - ["uv", "run", "--no-project", "llm", "install", "-U", provider], - check=True, - capture_output=True, - ) - print(f"Successfully installed {provider}") - except subprocess.CalledProcessError as e: - showWarning(f"Failed to install {provider}: {str(e)}") - except subprocess.CalledProcessError as e: showWarning(f"Failed to install llm: {str(e)}") diff --git a/scripts/install_llm.sh b/scripts/install_llm.sh new file mode 100755 index 000000000..a2c3c3401 --- /dev/null +++ b/scripts/install_llm.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +# Install base llm package if not already installed +if ! command -v llm &> /dev/null; then + uv tool install llm +fi + +# Install additional providers +providers=("llm-gemini" "llm-perplexity" "llm-claude-3") +for provider in "${providers[@]}"; do + uv run --no-project llm install -U "$provider" +done diff --git a/scripts/install_uv.sh b/scripts/install_uv.sh new file mode 100755 index 000000000..4cbb45eb8 --- /dev/null +++ b/scripts/install_uv.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +# Check if uv is already installed +if ! command -v uv &> /dev/null; then + curl -LsSf https://astral.sh/uv/install.sh | sh +fi diff --git a/scripts/release.sh b/scripts/release.sh index 7a4d2eae3..e2b47d6c1 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -1,6 +1,6 @@ set -e -python3 scripts/build.py +python scripts/build.py mkdir -p dist rm -rf dist/release From df50cc1e081ec5b6c9d649829449c98f19549663 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Fri, 14 Feb 2025 11:56:07 +0000 Subject: [PATCH 32/53] Move scripts. --- {scripts => ankihub/labs/llm}/install_llm.sh | 0 {scripts => ankihub/labs/llm}/install_uv.sh | 0 ankihub/labs/llm/llm.py | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename {scripts => ankihub/labs/llm}/install_llm.sh (100%) rename {scripts => ankihub/labs/llm}/install_uv.sh (100%) diff --git a/scripts/install_llm.sh b/ankihub/labs/llm/install_llm.sh similarity index 100% rename from scripts/install_llm.sh rename to ankihub/labs/llm/install_llm.sh diff --git a/scripts/install_uv.sh b/ankihub/labs/llm/install_uv.sh similarity index 100% rename from scripts/install_uv.sh rename to ankihub/labs/llm/install_uv.sh diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index c7391c9cb..bb97325ee 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -275,7 +275,7 @@ def _check_and_install_uv() -> None: check=True, ) else: # macOS and Linux - script_path = Path(__file__).parents[4] / "scripts" / "install_uv.sh" + script_path = Path(__file__).parent / "install_uv.sh" subprocess.run([str(script_path)], check=True) tooltip("Successfully installed uv") except subprocess.CalledProcessError as e: @@ -290,7 +290,7 @@ def _install_llm() -> None: print("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: - script_path = Path(__file__).parents[4] / "scripts" / "install_llm.sh" + script_path = Path(__file__).parent / "install_llm.sh" subprocess.run([str(script_path)], check=True) tooltip("Successfully installed llm") except subprocess.CalledProcessError as e: From 33585d6da4806c321af31183d1b6db60d50a8997 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Fri, 14 Feb 2025 13:46:47 +0100 Subject: [PATCH 33/53] Make _install_llm work on Windows --- ankihub/labs/llm/llm.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index bb97325ee..6cbecc35f 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -290,8 +290,17 @@ def _install_llm() -> None: print("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: - script_path = Path(__file__).parent / "install_llm.sh" - subprocess.run([str(script_path)], check=True) + if platform.system() == "Windows": + subprocess.run(["uv", "tool", "install", "llm"], check=True) + providers = ["llm-gemini", "llm-perplexity", "llm-claude-3"] + for provider in providers: + subprocess.run( + ["uv", "run", "--no-project", "llm", "install", "-U", provider], + check=True, + ) + else: + script_path = Path(__file__).parent / "install_llm.sh" + subprocess.run([str(script_path)], check=True) tooltip("Successfully installed llm") except subprocess.CalledProcessError as e: showWarning(f"Failed to install llm: {str(e)}") From f1b1e930933130a3353995d6df5b3fa079078f52 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Fri, 14 Feb 2025 13:52:29 +0100 Subject: [PATCH 34/53] Fix focus issue --- ankihub/labs/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 6cbecc35f..16a02e8fa 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -412,7 +412,7 @@ def _show_llm_response(editor: Editor, response: str) -> None: showWarning("Invalid JSON response from LLM") return - dialog = QDialog(active_window_or_mw()) + dialog = QDialog(editor.parentWindow) dialog.setWindowTitle("LLM Response - Field Changes") dialog.setMinimumWidth(800) dialog.setMinimumHeight(600) From 98e2ba9ae78cf44ef1a44656e2ec242c9bd83996 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Fri, 14 Feb 2025 13:59:07 +0100 Subject: [PATCH 35/53] Move labs depedenencies installation to background --- ankihub/labs/llm/llm.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 16a02e8fa..d59f7119a 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -24,6 +24,7 @@ from aqt.utils import showWarning, tooltip from jinja2 import Template +from ...gui.operations import AddonQueryOp from ...gui.utils import active_window_or_mw PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" @@ -308,8 +309,17 @@ def _install_llm() -> None: def setup() -> None: """Set up the LLM prompt functionality.""" - _check_and_install_uv() - _install_llm() + + def _install_labs_dependencies(): + _check_and_install_uv() + _install_llm() + + AddonQueryOp( + parent=active_window_or_mw(), + op=lambda _: _install_labs_dependencies(), + success=lambda _: None, + ).with_progress("Setting up AnkiHub Labs").run_in_background() + TemplateManager.initialize() # Initialize templates path gui_hooks.editor_did_init_buttons.append(_setup_prompt_selector_button) gui_hooks.webview_did_receive_js_message.append(_handle_js_message) From 3c9f64e8def224c5a8035ba459878c0188897d8b Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Fri, 14 Feb 2025 17:06:27 +0100 Subject: [PATCH 36/53] Use without_collection on query op for installing labs dependencies --- ankihub/labs/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index d59f7119a..dbbec1c18 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -318,7 +318,7 @@ def _install_labs_dependencies(): parent=active_window_or_mw(), op=lambda _: _install_labs_dependencies(), success=lambda _: None, - ).with_progress("Setting up AnkiHub Labs").run_in_background() + ).without_collection().with_progress("Setting up AnkiHub Labs").run_in_background() TemplateManager.initialize() # Initialize templates path gui_hooks.editor_did_init_buttons.append(_setup_prompt_selector_button) From 8f84e8b1d278ff764474cef1870530562dfa865d Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 12:25:48 +0000 Subject: [PATCH 37/53] Use shell scripts. --- ankihub/entry_point.py | 7 +++- ankihub/labs/llm/check_llm.sh | 7 ++++ ankihub/labs/llm/check_uv.sh | 7 ++++ ankihub/labs/llm/execute_llm.sh | 18 ++++++++ ankihub/labs/llm/execute_prompt.sh | 12 ++++++ ankihub/labs/llm/get_templates_path.sh | 7 ++++ ankihub/labs/llm/llm.py | 57 ++++++++++---------------- 7 files changed, 78 insertions(+), 37 deletions(-) create mode 100755 ankihub/labs/llm/check_llm.sh create mode 100755 ankihub/labs/llm/check_uv.sh create mode 100755 ankihub/labs/llm/execute_llm.sh create mode 100755 ankihub/labs/llm/execute_prompt.sh create mode 100755 ankihub/labs/llm/get_templates_path.sh diff --git a/ankihub/entry_point.py b/ankihub/entry_point.py index a82596b77..5a91f2bdc 100644 --- a/ankihub/entry_point.py +++ b/ankihub/entry_point.py @@ -241,8 +241,11 @@ def _general_setup(): if config.labs_enabled: from .labs.llm import llm - llm.setup() - LOGGER.info("Set up LLM prompt functionality.") + try: + llm.setup() + LOGGER.info("Set up LLM prompt functionality.") + except Exception as e: + LOGGER.exception("Failed to set up LLM prompt functionality", error=e) def _copy_web_media_to_media_folder(): diff --git a/ankihub/labs/llm/check_llm.sh b/ankihub/labs/llm/check_llm.sh new file mode 100755 index 000000000..574080bcc --- /dev/null +++ b/ankihub/labs/llm/check_llm.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +llm --version diff --git a/ankihub/labs/llm/check_uv.sh b/ankihub/labs/llm/check_uv.sh new file mode 100755 index 000000000..cc995d9d4 --- /dev/null +++ b/ankihub/labs/llm/check_uv.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +uv version diff --git a/ankihub/labs/llm/execute_llm.sh b/ankihub/labs/llm/execute_llm.sh new file mode 100755 index 000000000..69aa7965d --- /dev/null +++ b/ankihub/labs/llm/execute_llm.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +# Arguments: +# $1: template_name +# $2: note_schema +# $3: note_content + +uv run --no-project llm \ + -m gpt-4o \ + --no-stream \ + -t "$1" \ + -p note_schema "$2" \ + "$3" \ + -o json_object 1 diff --git a/ankihub/labs/llm/execute_prompt.sh b/ankihub/labs/llm/execute_prompt.sh new file mode 100755 index 000000000..79734ad19 --- /dev/null +++ b/ankihub/labs/llm/execute_prompt.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +# Arguments: +# $1: template_name +# $2: note_schema +# $3: note_content + +uv run --no-project llm -m gpt-4o --no-stream -t "$1" -p note_schema "$2" "$3" -o json_object 1 diff --git a/ankihub/labs/llm/get_templates_path.sh b/ankihub/labs/llm/get_templates_path.sh new file mode 100755 index 000000000..fef12f933 --- /dev/null +++ b/ankihub/labs/llm/get_templates_path.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +uv run --no-project llm templates path diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index dbbec1c18..aa5ee8ef0 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -4,6 +4,7 @@ import json import platform import subprocess +import sys from pathlib import Path from typing import Any, Dict, List @@ -24,11 +25,15 @@ from aqt.utils import showWarning, tooltip from jinja2 import Template +from ... import LOGGER from ...gui.operations import AddonQueryOp from ...gui.utils import active_window_or_mw PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" +# Modify the system path by prepending $HOME/.local/bin with sys.path.insert +sys.path.insert(0, str(Path.home() / ".local/bin")) + class TemplateManager: """Manages LLM template operations and caching.""" @@ -40,22 +45,23 @@ class TemplateManager: def initialize(cls) -> None: """Initialize the template manager by finding the templates directory.""" try: + script_path = Path(__file__).parent / "get_templates_path.sh" result = subprocess.run( - ["uv", "run", "--no-project", "llm", "templates", "path"], + [str(script_path)], capture_output=True, text=True, check=True, ) cls._templates_path = Path(result.stdout.strip()) - print(f"Templates directory: {cls._templates_path}") + LOGGER.info("Templates directory found", path=str(cls._templates_path)) # After finding templates path, try to copy local templates cls._copy_local_templates() except subprocess.CalledProcessError as e: - print(f"Error finding templates directory: {e.stderr}") + LOGGER.error("Error finding templates directory", error=str(e.stderr)) cls._templates_path = None except Exception as e: - print(f"Unexpected error finding templates directory: {str(e)}") + LOGGER.error("Unexpected error finding templates directory", error=str(e)) cls._templates_path = None @classmethod @@ -72,12 +78,12 @@ def _copy_local_templates(cls) -> None: for template_file in cls._local_templates_dir.glob("*.yaml"): target_path = cls._templates_path / template_file.name if not target_path.exists(): - print(f"Copying template: {template_file.name}") + LOGGER.info("Copying template", template=template_file.name) target_path.write_text(template_file.read_text()) else: - print(f"Template already exists: {template_file.name}") + LOGGER.info("Template already exists", template=template_file.name) except Exception as e: - print(f"Error copying templates: {str(e)}") + LOGGER.error("Error copying templates", error=str(e)) @classmethod def get_templates_path(cls): @@ -265,8 +271,9 @@ def _on_execute(self) -> None: def _check_and_install_uv() -> None: """Check if uv is installed and install it if not.""" try: - subprocess.run(["uv", "version"], capture_output=True, check=True) - print("uv is installed") + script_path = Path(__file__).parent / "check_uv.sh" + subprocess.run([str(script_path)], capture_output=True, check=True) + LOGGER.info("uv is installed") except (subprocess.CalledProcessError, FileNotFoundError): try: if platform.system() == "Windows": @@ -287,8 +294,9 @@ def _install_llm() -> None: """Install llm and additional providers using uv if not already installed.""" # TODO Prompt users to set up their API keys. try: - subprocess.run(["llm", "--version"], capture_output=True, check=True) - print("llm is already installed") + check_llm_script = Path(__file__).parent / "check_llm.sh" + subprocess.run([str(check_llm_script)], capture_output=True, check=True) + LOGGER.info("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: if platform.system() == "Windows": @@ -304,7 +312,7 @@ def _install_llm() -> None: subprocess.run([str(script_path)], check=True) tooltip("Successfully installed llm") except subprocess.CalledProcessError as e: - showWarning(f"Failed to install llm: {str(e)}") + showWarning(f"Failed to install uv: {str(e)}") def setup() -> None: @@ -511,32 +519,11 @@ def _execute_prompt_template( try: # Run the LLM command with the template and note content - import shlex - - escaped_content = shlex.quote(note_content) note_schema = json.dumps([{field: "string" for field in editor.note.keys()}]) - - cmd = [ - "uv", - "run", - "--no-project", - "llm", - "-m", - "gpt-4o", - "--no-stream", - "-t", - template_name, - "-p", - "note_schema", - shlex.quote(note_schema), - escaped_content, - "-o", - "json_object", - "1", - ] + script_path = Path(__file__).parent / "execute_prompt.sh" result = subprocess.run( - cmd, + [str(script_path), template_name, note_schema, note_content], capture_output=True, text=True, check=True, From 1d7b0780d260a745b90f7aaaee22cbecd62b12c0 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 12:29:06 +0000 Subject: [PATCH 38/53] Make scripts excutable. --- ankihub/entry_point.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ankihub/entry_point.py b/ankihub/entry_point.py index 5a91f2bdc..0c41a13fd 100644 --- a/ankihub/entry_point.py +++ b/ankihub/entry_point.py @@ -237,6 +237,14 @@ def _general_setup(): LOGGER.info( "Set up feature flag fetching (flags will be fetched in the background)." ) + # Make all .sh files executable + for sh_file in Path(__file__).parent.rglob("*.sh"): + current_mode = sh_file.stat().st_mode + executable_mode = ( + current_mode | 0o111 + ) # Add execute permission for user, group and others + sh_file.chmod(executable_mode) + LOGGER.info("Made all .sh files executable.") if config.labs_enabled: from .labs.llm import llm From 90745e86955d2ece601818436a281e3fff2d60dc Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 12:33:20 +0000 Subject: [PATCH 39/53] Combine scripts. --- ankihub/labs/llm/check_llm.sh | 7 --- ankihub/labs/llm/check_uv.sh | 7 --- ankihub/labs/llm/execute_llm.sh | 18 ------- ankihub/labs/llm/execute_prompt.sh | 12 ----- ankihub/labs/llm/get_templates_path.sh | 7 --- ankihub/labs/llm/install_llm.sh | 16 ------ ankihub/labs/llm/install_uv.sh | 10 ---- ankihub/labs/llm/llm.py | 30 ++++++----- ankihub/labs/llm/llm.sh | 72 ++++++++++++++++++++++++++ 9 files changed, 90 insertions(+), 89 deletions(-) delete mode 100755 ankihub/labs/llm/check_llm.sh delete mode 100755 ankihub/labs/llm/check_uv.sh delete mode 100755 ankihub/labs/llm/execute_llm.sh delete mode 100755 ankihub/labs/llm/execute_prompt.sh delete mode 100755 ankihub/labs/llm/get_templates_path.sh delete mode 100755 ankihub/labs/llm/install_llm.sh delete mode 100755 ankihub/labs/llm/install_uv.sh create mode 100755 ankihub/labs/llm/llm.sh diff --git a/ankihub/labs/llm/check_llm.sh b/ankihub/labs/llm/check_llm.sh deleted file mode 100755 index 574080bcc..000000000 --- a/ankihub/labs/llm/check_llm.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e - -# Ensure ~/.local/bin is in PATH -export PATH="$HOME/.local/bin:$PATH" - -llm --version diff --git a/ankihub/labs/llm/check_uv.sh b/ankihub/labs/llm/check_uv.sh deleted file mode 100755 index cc995d9d4..000000000 --- a/ankihub/labs/llm/check_uv.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e - -# Ensure ~/.local/bin is in PATH -export PATH="$HOME/.local/bin:$PATH" - -uv version diff --git a/ankihub/labs/llm/execute_llm.sh b/ankihub/labs/llm/execute_llm.sh deleted file mode 100755 index 69aa7965d..000000000 --- a/ankihub/labs/llm/execute_llm.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -e - -# Ensure ~/.local/bin is in PATH -export PATH="$HOME/.local/bin:$PATH" - -# Arguments: -# $1: template_name -# $2: note_schema -# $3: note_content - -uv run --no-project llm \ - -m gpt-4o \ - --no-stream \ - -t "$1" \ - -p note_schema "$2" \ - "$3" \ - -o json_object 1 diff --git a/ankihub/labs/llm/execute_prompt.sh b/ankihub/labs/llm/execute_prompt.sh deleted file mode 100755 index 79734ad19..000000000 --- a/ankihub/labs/llm/execute_prompt.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -e - -# Ensure ~/.local/bin is in PATH -export PATH="$HOME/.local/bin:$PATH" - -# Arguments: -# $1: template_name -# $2: note_schema -# $3: note_content - -uv run --no-project llm -m gpt-4o --no-stream -t "$1" -p note_schema "$2" "$3" -o json_object 1 diff --git a/ankihub/labs/llm/get_templates_path.sh b/ankihub/labs/llm/get_templates_path.sh deleted file mode 100755 index fef12f933..000000000 --- a/ankihub/labs/llm/get_templates_path.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e - -# Ensure ~/.local/bin is in PATH -export PATH="$HOME/.local/bin:$PATH" - -uv run --no-project llm templates path diff --git a/ankihub/labs/llm/install_llm.sh b/ankihub/labs/llm/install_llm.sh deleted file mode 100755 index a2c3c3401..000000000 --- a/ankihub/labs/llm/install_llm.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -set -e - -# Ensure ~/.local/bin is in PATH -export PATH="$HOME/.local/bin:$PATH" - -# Install base llm package if not already installed -if ! command -v llm &> /dev/null; then - uv tool install llm -fi - -# Install additional providers -providers=("llm-gemini" "llm-perplexity" "llm-claude-3") -for provider in "${providers[@]}"; do - uv run --no-project llm install -U "$provider" -done diff --git a/ankihub/labs/llm/install_uv.sh b/ankihub/labs/llm/install_uv.sh deleted file mode 100755 index 4cbb45eb8..000000000 --- a/ankihub/labs/llm/install_uv.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -e - -# Ensure ~/.local/bin is in PATH -export PATH="$HOME/.local/bin:$PATH" - -# Check if uv is already installed -if ! command -v uv &> /dev/null; then - curl -LsSf https://astral.sh/uv/install.sh | sh -fi diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index aa5ee8ef0..4e20aadbd 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -45,9 +45,9 @@ class TemplateManager: def initialize(cls) -> None: """Initialize the template manager by finding the templates directory.""" try: - script_path = Path(__file__).parent / "get_templates_path.sh" + script_path = Path(__file__).parent / "llm.sh" result = subprocess.run( - [str(script_path)], + [str(script_path), "get_templates_path"], capture_output=True, text=True, check=True, @@ -271,8 +271,8 @@ def _on_execute(self) -> None: def _check_and_install_uv() -> None: """Check if uv is installed and install it if not.""" try: - script_path = Path(__file__).parent / "check_uv.sh" - subprocess.run([str(script_path)], capture_output=True, check=True) + script_path = Path(__file__).parent / "llm.sh" + subprocess.run([str(script_path), "check_uv"], capture_output=True, check=True) LOGGER.info("uv is installed") except (subprocess.CalledProcessError, FileNotFoundError): try: @@ -283,8 +283,8 @@ def _check_and_install_uv() -> None: check=True, ) else: # macOS and Linux - script_path = Path(__file__).parent / "install_uv.sh" - subprocess.run([str(script_path)], check=True) + script_path = Path(__file__).parent / "llm.sh" + subprocess.run([str(script_path), "install_uv"], check=True) tooltip("Successfully installed uv") except subprocess.CalledProcessError as e: showWarning(f"Failed to install uv: {str(e)}") @@ -294,8 +294,8 @@ def _install_llm() -> None: """Install llm and additional providers using uv if not already installed.""" # TODO Prompt users to set up their API keys. try: - check_llm_script = Path(__file__).parent / "check_llm.sh" - subprocess.run([str(check_llm_script)], capture_output=True, check=True) + script_path = Path(__file__).parent / "llm.sh" + subprocess.run([str(script_path), "check_llm"], capture_output=True, check=True) LOGGER.info("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: @@ -308,8 +308,8 @@ def _install_llm() -> None: check=True, ) else: - script_path = Path(__file__).parent / "install_llm.sh" - subprocess.run([str(script_path)], check=True) + script_path = Path(__file__).parent / "llm.sh" + subprocess.run([str(script_path), "install_llm"], check=True) tooltip("Successfully installed llm") except subprocess.CalledProcessError as e: showWarning(f"Failed to install uv: {str(e)}") @@ -520,10 +520,16 @@ def _execute_prompt_template( try: # Run the LLM command with the template and note content note_schema = json.dumps([{field: "string" for field in editor.note.keys()}]) - script_path = Path(__file__).parent / "execute_prompt.sh" + script_path = Path(__file__).parent / "llm.sh" result = subprocess.run( - [str(script_path), template_name, note_schema, note_content], + [ + str(script_path), + "execute_prompt", + template_name, + note_schema, + note_content, + ], capture_output=True, text=True, check=True, diff --git a/ankihub/labs/llm/llm.sh b/ankihub/labs/llm/llm.sh new file mode 100755 index 000000000..bfc96ea47 --- /dev/null +++ b/ankihub/labs/llm/llm.sh @@ -0,0 +1,72 @@ +#!/bin/bash +set -e + +# Ensure ~/.local/bin is in PATH +export PATH="$HOME/.local/bin:$PATH" + +function check_uv() { + uv version +} + +function install_uv() { + # Install uv using the official installer + curl -LsSf https://astral.sh/uv/install.sh | sh + + # Add uv to the current PATH + export PATH="$HOME/.local/bin:$PATH" +} + +function check_llm() { + llm --version +} + +function install_llm() { + # Install llm using uv + uv tool install llm + + # Install additional providers + uv run --no-project llm install -U llm-gemini + uv run --no-project llm install -U llm-perplexity + uv run --no-project llm install -U llm-claude-3 +} + +function get_templates_path() { + uv run --no-project llm templates path +} + +function execute_prompt() { + # Arguments: + # $1: template_name + # $2: note_schema + # $3: note_content + uv run --no-project llm -m gpt-4o --no-stream -t "$1" -p note_schema "$2" "$3" -o json_object 1 +} + +# Main command router +cmd=$1 +shift # Remove first argument (the command) to pass remaining args to functions + +case $cmd in + "check_uv") + check_uv + ;; + "install_uv") + install_uv + ;; + "check_llm") + check_llm + ;; + "install_llm") + install_llm + ;; + "get_templates_path") + get_templates_path + ;; + "execute_prompt") + execute_prompt "$@" + ;; + *) + echo "Unknown command: $cmd" + exit 1 + ;; +esac From 44af1fee7d8681c07d625c865f0889ed21a6746e Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 12:50:23 +0000 Subject: [PATCH 40/53] Provide path to UV. --- ankihub/labs/llm/llm.sh | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/ankihub/labs/llm/llm.sh b/ankihub/labs/llm/llm.sh index bfc96ea47..d77114190 100755 --- a/ankihub/labs/llm/llm.sh +++ b/ankihub/labs/llm/llm.sh @@ -4,8 +4,11 @@ set -e # Ensure ~/.local/bin is in PATH export PATH="$HOME/.local/bin:$PATH" +# Define UV executable path +UV_PATH="$HOME/.local/bin/uv" + function check_uv() { - uv version + "$UV_PATH" version } function install_uv() { @@ -22,16 +25,16 @@ function check_llm() { function install_llm() { # Install llm using uv - uv tool install llm + "$UV_PATH" tool install llm # Install additional providers - uv run --no-project llm install -U llm-gemini - uv run --no-project llm install -U llm-perplexity - uv run --no-project llm install -U llm-claude-3 + "$UV_PATH" run --no-project llm install -U llm-gemini + "$UV_PATH" run --no-project llm install -U llm-perplexity + "$UV_PATH" run --no-project llm install -U llm-claude-3 } function get_templates_path() { - uv run --no-project llm templates path + "$UV_PATH" run --no-project llm templates path } function execute_prompt() { @@ -39,7 +42,7 @@ function execute_prompt() { # $1: template_name # $2: note_schema # $3: note_content - uv run --no-project llm -m gpt-4o --no-stream -t "$1" -p note_schema "$2" "$3" -o json_object 1 + "$UV_PATH" run --no-project llm -m gpt-4o --no-stream -t "$1" -p note_schema "$2" "$3" -o json_object 1 } # Main command router From b5b9b262f5c1a7dba4739c515e3c23b2567c23a7 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 12:50:34 +0000 Subject: [PATCH 41/53] utf-8. --- ankihub/labs/llm/llm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 4e20aadbd..203938808 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -79,11 +79,15 @@ def _copy_local_templates(cls) -> None: target_path = cls._templates_path / template_file.name if not target_path.exists(): LOGGER.info("Copying template", template=template_file.name) - target_path.write_text(template_file.read_text()) + target_path.write_text( + template_file.read_text(encoding="utf-8"), encoding="utf-8" + ) else: LOGGER.info("Template already exists", template=template_file.name) except Exception as e: - LOGGER.error("Error copying templates", error=str(e)) + LOGGER.error( + "Error copying templates", error=str(e), path=str(cls._templates_path) + ) @classmethod def get_templates_path(cls): From 185ffbeda3b6a2ed8e1ac340b5e04465447e36fa Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 13:20:17 +0000 Subject: [PATCH 42/53] utf-8 --- ankihub/labs/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 203938808..178abb4be 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -108,7 +108,7 @@ def get_template_content(cls, template_name: str) -> str: return "Template file not found" try: - return template_file.read_text() + return template_file.read_text(encoding="utf-8") except Exception as e: return f"Error reading template: {str(e)}" From bc5957b95d786478f8fc7272ebe2f4860939c962 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 13:40:26 +0000 Subject: [PATCH 43/53] FIx secrets dialog. --- ankihub/labs/llm/llm.sh | 7 +++++++ ankihub/labs/secrets/dialog.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.sh b/ankihub/labs/llm/llm.sh index d77114190..5ddcad2b9 100755 --- a/ankihub/labs/llm/llm.sh +++ b/ankihub/labs/llm/llm.sh @@ -37,6 +37,10 @@ function get_templates_path() { "$UV_PATH" run --no-project llm templates path } +function get_keys_path() { + "$UV_PATH" run --no-project llm keys path +} + function execute_prompt() { # Arguments: # $1: template_name @@ -65,6 +69,9 @@ case $cmd in "get_templates_path") get_templates_path ;; + "get_keys_path") + get_keys_path + ;; "execute_prompt") execute_prompt "$@" ;; diff --git a/ankihub/labs/secrets/dialog.py b/ankihub/labs/secrets/dialog.py index 950659c87..4f07f4cfe 100644 --- a/ankihub/labs/secrets/dialog.py +++ b/ankihub/labs/secrets/dialog.py @@ -29,8 +29,9 @@ def __init__(self): def _get_keys_file_path(self) -> Path: """Get the path to the keys.json file.""" try: + script_path = Path(__file__).parent.parent / "llm" / "llm.sh" result = subprocess.run( - ["uv", "run", "--no-project", "llm", "keys", "path"], + [str(script_path), "get_keys_path"], capture_output=True, text=True, check=True, From 57e5d47b871f292292f5b189f2903d30340f039b Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 16:12:53 +0000 Subject: [PATCH 44/53] Pass and update only the first field. --- ankihub/labs/llm/llm.py | 51 +++++++------------ ankihub/labs/llm/llm.sh | 2 +- ...rove.yaml => anki-improve-cloze-note.yaml} | 22 ++++---- 3 files changed, 29 insertions(+), 46 deletions(-) rename ankihub/labs/llm/prompt_templates/{anki-improve.yaml => anki-improve-cloze-note.yaml} (60%) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 178abb4be..a0ba134d9 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -425,15 +425,6 @@ def _create_diff_html(original: str, suggested: str) -> str: def _show_llm_response(editor: Editor, response: str) -> None: """Display the LLM response in a dialog with option to update note.""" - try: - suggested_fields = json.loads(response) - if not isinstance(suggested_fields, dict): - showWarning("Invalid response format. Expected a JSON object.") - return - except json.JSONDecodeError: - showWarning("Invalid JSON response from LLM") - return - dialog = QDialog(editor.parentWindow) dialog.setWindowTitle("LLM Response - Field Changes") dialog.setMinimumWidth(800) @@ -445,7 +436,7 @@ def _show_llm_response(editor: Editor, response: str) -> None: text_edit = QTextEdit() text_edit.setReadOnly(True) - # Build HTML content showing diffs for each field + # Build HTML content showing diff for the first field html_content = [ "", ] + # Get the first field's name and content note = editor.note - for field_name, suggested_content in suggested_fields.items(): - if field_name in note: - original_content = note[field_name] - html_content.append(f'
{field_name}:
') - html_content.append('
') - html_content.append(_create_diff_html(original_content, suggested_content)) - html_content.append("
") + if note and note.keys(): + field_name = note.keys()[0] + original_content = note[field_name] + html_content.append(f'
{field_name}:
') + html_content.append('
') + html_content.append(_create_diff_html(original_content, response)) + html_content.append("
") text_edit.setHtml("".join(html_content)) layout.addWidget(text_edit) @@ -486,22 +478,16 @@ def _show_llm_response(editor: Editor, response: str) -> None: def _handle_update_note(editor: Editor, response: str, dialog: QDialog) -> None: """Handle the update note button click.""" - dialog.accept() try: - # Parse the JSON response - new_fields = json.loads(response) - if not isinstance(new_fields, dict): - showWarning("Invalid response format. Expected a JSON object.") - return - - # Update the note - _update_note_fields(editor, new_fields) - tooltip("Note updated successfully") - - except json.JSONDecodeError: - showWarning("Invalid JSON response from LLM") + note = editor.note + if note and note.keys(): + # Update only the first field + field_name = note.keys()[0] + note[field_name] = response + editor.loadNoteKeepingFocus() + tooltip("Note updated successfully") except Exception as e: showWarning(f"Error updating note: {str(e)}") @@ -517,13 +503,13 @@ def _execute_prompt_template( ) -> None: """Execute the selected prompt template with the current note as input.""" note_content = _get_note_content(editor) + text_field = editor.note.items()[0][1] if not note_content: tooltip("No note content available") return try: # Run the LLM command with the template and note content - note_schema = json.dumps([{field: "string" for field in editor.note.keys()}]) script_path = Path(__file__).parent / "llm.sh" result = subprocess.run( @@ -531,8 +517,7 @@ def _execute_prompt_template( str(script_path), "execute_prompt", template_name, - note_schema, - note_content, + text_field, ], capture_output=True, text=True, diff --git a/ankihub/labs/llm/llm.sh b/ankihub/labs/llm/llm.sh index 5ddcad2b9..4fc633a29 100755 --- a/ankihub/labs/llm/llm.sh +++ b/ankihub/labs/llm/llm.sh @@ -46,7 +46,7 @@ function execute_prompt() { # $1: template_name # $2: note_schema # $3: note_content - "$UV_PATH" run --no-project llm -m gpt-4o --no-stream -t "$1" -p note_schema "$2" "$3" -o json_object 1 + "$UV_PATH" run --no-project llm -m gpt-4o --no-stream -t "$1" "$2" } # Main command router diff --git a/ankihub/labs/llm/prompt_templates/anki-improve.yaml b/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml similarity index 60% rename from ankihub/labs/llm/prompt_templates/anki-improve.yaml rename to ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml index 6108eb7db..763308ea6 100644 --- a/ankihub/labs/llm/prompt_templates/anki-improve.yaml +++ b/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml @@ -1,15 +1,12 @@ +model: gpt-4o system: | - You are an expert at creating effective flashcards. Analyze the provided note and respond with an improved note with the following JSON format schema: - - $note_schema - + You are an expert at creating effective flashcards. Analyze the provided cloze deletion note and respond with an improved note according to the users instructions Follow these guidelines: - 1. Use cloze deletions to hide key information, typically one or two words at a time. - 2. Use the {{c1::hidden text}} format for cloze deletions. You can also use {{c2::hidden text}}, {{c3::hidden text}}, and so on. This will result in multiple cards being created for the note by Anki. Cloze deletions with the same number will be hidden on the same card, while all the other cloze deletions won't be used for this card. - 3. Most notes should only have c1, notes with c3 or more should be rare. - 4. Most notes should only have one cloze deletion. Focus on the highest yield concepts and facts. "high-yield" means the information is most important or most frequently tested on exams. - 5. Create focused cards which tests a single concept or fact. - 6. Ensure the remaining context provides enough information for the user to recall the hidden content. + - Use cloze deletions to hide key information, typically one or two words at a time. + - Use the {{c1::hidden text}} format for cloze deletions. You can also use {{c2::hidden text}}, {{c3::hidden text}}, and so on. This will result in multiple cards being created for the note by Anki. Cloze deletions with the same number will be hidden on the same card, while all the other cloze deletions won't be used for this card. + - Most notes should only have one cloze deletion. Focus on the highest yield concepts and facts. "high-yield" means the information is most important or most frequently tested on exams. + - Create focused cards which tests a single concept or fact. + - Ensure the remaining context provides enough information for the user to recall the hidden content. Here are some examples of well-constructed cloze deletion notes: - Persistent {{c1::infection}} is the most common symptom of chronic sinopulmonary disease @@ -24,9 +21,10 @@ system: | - The posterior lobe of the pituitary gland is derived from {{c1::neural}} tissue. - The skeleton protects {{c1::internal organs}} and stores minerals like {{c2::calcium}} and {{c2::phosphate}}. - Improve the provided cloze deletion note based on the instructions above. Ensure that the card is clear, and effective for learning and recall. + Improve the provided cloze deletion note based on the instructions above. Ensure that the card is clear, and effective for learning and recall. Respond only with the improved note. Do not provide any pre-amble, explanation, or surrounding xml tags. prompt: | Here is the card to improve: + $input -model: gpt-4o + From 3c6d2b88074d082a6a11676874f86827cc64d596 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sat, 15 Feb 2025 16:54:48 +0000 Subject: [PATCH 45/53] Template. --- ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml b/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml index 763308ea6..4927c370c 100644 --- a/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml +++ b/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml @@ -1,6 +1,6 @@ model: gpt-4o system: | - You are an expert at creating effective flashcards. Analyze the provided cloze deletion note and respond with an improved note according to the users instructions + You are an expert at creating effective flashcards. Analyze the provided cloze deletion note and respond with an improved note according to the users instructions. Follow these guidelines: - Use cloze deletions to hide key information, typically one or two words at a time. - Use the {{c1::hidden text}} format for cloze deletions. You can also use {{c2::hidden text}}, {{c3::hidden text}}, and so on. This will result in multiple cards being created for the note by Anki. Cloze deletions with the same number will be hidden on the same card, while all the other cloze deletions won't be used for this card. From f29e542d0d43c373cafd6bd84ce4e676cc033674 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Sun, 16 Feb 2025 14:40:46 +0000 Subject: [PATCH 46/53] Be sure to install providers. --- ankihub/labs/llm/llm.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index a0ba134d9..64e7d69b8 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -305,18 +305,33 @@ def _install_llm() -> None: try: if platform.system() == "Windows": subprocess.run(["uv", "tool", "install", "llm"], check=True) - providers = ["llm-gemini", "llm-perplexity", "llm-claude-3"] - for provider in providers: - subprocess.run( - ["uv", "run", "--no-project", "llm", "install", "-U", provider], - check=True, - ) else: script_path = Path(__file__).parent / "llm.sh" subprocess.run([str(script_path), "install_llm"], check=True) tooltip("Successfully installed llm") + LOGGER.info("Successfully installed llm") except subprocess.CalledProcessError as e: - showWarning(f"Failed to install uv: {str(e)}") + showWarning(f"Failed to install llm: {str(e)}") + LOGGER.error("Failed to install llm", error=str(e)) + return + + # Install providers regardless of llm installation status + try: + if platform.system() == "Windows": + providers = ["llm-gemini", "llm-perplexity", "llm-claude-3"] + for provider in providers: + subprocess.run( + ["uv", "run", "--no-project", "llm", "install", "-U", provider], + check=True, + ) + else: + script_path = Path(__file__).parent / "llm.sh" + subprocess.run([str(script_path), "install_providers"], check=True) + tooltip("Successfully installed llm providers") + LOGGER.info("Successfully installed llm providers") + except subprocess.CalledProcessError as e: + showWarning(f"Failed to install llm providers: {str(e)}") + LOGGER.error("Failed to install llm providers", error=str(e)) def setup() -> None: From ed2534460f49363f8e84ae478d203eb1282c6f33 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Mon, 17 Feb 2025 12:40:26 +0100 Subject: [PATCH 47/53] fix: Add install_providers cmd, don't make UI changes in background --- ankihub/labs/llm/llm.py | 43 ++++++++++++++++++----------------------- ankihub/labs/llm/llm.sh | 5 +++++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 64e7d69b8..9a1e3b267 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -31,6 +31,8 @@ PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" +LLM_SCRIPT_PATH = Path(__file__).parent / "llm.sh" + # Modify the system path by prepending $HOME/.local/bin with sys.path.insert sys.path.insert(0, str(Path.home() / ".local/bin")) @@ -45,9 +47,8 @@ class TemplateManager: def initialize(cls) -> None: """Initialize the template manager by finding the templates directory.""" try: - script_path = Path(__file__).parent / "llm.sh" result = subprocess.run( - [str(script_path), "get_templates_path"], + [str(LLM_SCRIPT_PATH), "get_templates_path"], capture_output=True, text=True, check=True, @@ -275,8 +276,9 @@ def _on_execute(self) -> None: def _check_and_install_uv() -> None: """Check if uv is installed and install it if not.""" try: - script_path = Path(__file__).parent / "llm.sh" - subprocess.run([str(script_path), "check_uv"], capture_output=True, check=True) + subprocess.run( + [str(LLM_SCRIPT_PATH), "check_uv"], capture_output=True, check=True + ) LOGGER.info("uv is installed") except (subprocess.CalledProcessError, FileNotFoundError): try: @@ -287,33 +289,30 @@ def _check_and_install_uv() -> None: check=True, ) else: # macOS and Linux - script_path = Path(__file__).parent / "llm.sh" - subprocess.run([str(script_path), "install_uv"], check=True) - tooltip("Successfully installed uv") + subprocess.run([str(LLM_SCRIPT_PATH), "install_uv"], check=True) except subprocess.CalledProcessError as e: - showWarning(f"Failed to install uv: {str(e)}") + LOGGER.warning("Failed to install uv", error=str(e)) + raise e def _install_llm() -> None: """Install llm and additional providers using uv if not already installed.""" # TODO Prompt users to set up their API keys. try: - script_path = Path(__file__).parent / "llm.sh" - subprocess.run([str(script_path), "check_llm"], capture_output=True, check=True) + subprocess.run( + [str(LLM_SCRIPT_PATH), "check_llm"], capture_output=True, check=True + ) LOGGER.info("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: if platform.system() == "Windows": subprocess.run(["uv", "tool", "install", "llm"], check=True) else: - script_path = Path(__file__).parent / "llm.sh" - subprocess.run([str(script_path), "install_llm"], check=True) - tooltip("Successfully installed llm") + subprocess.run([str(LLM_SCRIPT_PATH), "install_llm"], check=True) LOGGER.info("Successfully installed llm") except subprocess.CalledProcessError as e: - showWarning(f"Failed to install llm: {str(e)}") - LOGGER.error("Failed to install llm", error=str(e)) - return + LOGGER.warning("Failed to install llm", error=str(e)) + raise e # Install providers regardless of llm installation status try: @@ -325,13 +324,11 @@ def _install_llm() -> None: check=True, ) else: - script_path = Path(__file__).parent / "llm.sh" - subprocess.run([str(script_path), "install_providers"], check=True) - tooltip("Successfully installed llm providers") + subprocess.run([str(LLM_SCRIPT_PATH), "install_providers"], check=True) LOGGER.info("Successfully installed llm providers") except subprocess.CalledProcessError as e: - showWarning(f"Failed to install llm providers: {str(e)}") - LOGGER.error("Failed to install llm providers", error=str(e)) + LOGGER.warning("Failed to install llm providers", error=str(e)) + raise e def setup() -> None: @@ -525,11 +522,9 @@ def _execute_prompt_template( try: # Run the LLM command with the template and note content - script_path = Path(__file__).parent / "llm.sh" - result = subprocess.run( [ - str(script_path), + str(LLM_SCRIPT_PATH), "execute_prompt", template_name, text_field, diff --git a/ankihub/labs/llm/llm.sh b/ankihub/labs/llm/llm.sh index 4fc633a29..267413786 100755 --- a/ankihub/labs/llm/llm.sh +++ b/ankihub/labs/llm/llm.sh @@ -26,7 +26,9 @@ function check_llm() { function install_llm() { # Install llm using uv "$UV_PATH" tool install llm +} +function install_providers(){ # Install additional providers "$UV_PATH" run --no-project llm install -U llm-gemini "$UV_PATH" run --no-project llm install -U llm-perplexity @@ -66,6 +68,9 @@ case $cmd in "install_llm") install_llm ;; + "install_providers") + install_providers + ;; "get_templates_path") get_templates_path ;; From 60c7286df656e12d0d660c7e696a1aaa5144e112 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Mon, 17 Feb 2025 12:47:34 +0100 Subject: [PATCH 48/53] fix: Remove unused note_schema argument --- ankihub/labs/llm/llm.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ankihub/labs/llm/llm.sh b/ankihub/labs/llm/llm.sh index 267413786..609c10518 100755 --- a/ankihub/labs/llm/llm.sh +++ b/ankihub/labs/llm/llm.sh @@ -46,8 +46,7 @@ function get_keys_path() { function execute_prompt() { # Arguments: # $1: template_name - # $2: note_schema - # $3: note_content + # $2: note_content "$UV_PATH" run --no-project llm -m gpt-4o --no-stream -t "$1" "$2" } From eb450df5ec33c7554aa987961fdd1046ff88afe2 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Mon, 17 Feb 2025 13:20:30 +0100 Subject: [PATCH 49/53] Add "Reset template" button to prompt preview --- ankihub/labs/llm/llm.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 9a1e3b267..9212dceea 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -27,7 +27,7 @@ from ... import LOGGER from ...gui.operations import AddonQueryOp -from ...gui.utils import active_window_or_mw +from ...gui.utils import active_window_or_mw, ask_user PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" @@ -239,6 +239,11 @@ def _setup_ui(self) -> None: save_button.clicked.connect(self._on_save) button_layout.addWidget(save_button) + # Reset button + reset_button = QPushButton("Reset Template") + reset_button.clicked.connect(self._on_reset) + button_layout.addWidget(reset_button) + # Cancel button cancel_button = QPushButton("Cancel") cancel_button.clicked.connect(self.reject) @@ -272,6 +277,30 @@ def _on_execute(self) -> None: self.accept() _execute_prompt_template(self.editor, self.template_name, modified_content) + def _update_editors(self) -> None: + """Update the YAML editors with the current yaml_data.""" + for key, editor in self.section_editors.items(): + if key in self.yaml_data: + editor.setPlainText(str(self.yaml_data[key])) + else: + editor.setPlainText("") + + def _on_reset(self) -> None: + """Reset the template to the version in the local templates directory.""" + if ask_user( + "Are you sure you want to reset the template to the original version?" + ): + local_template_path = ( + TemplateManager._local_templates_dir / f"{self.template_name}.yaml" + ) + self.template_content = local_template_path.read_text(encoding="utf-8") + self.yaml_data = yaml.safe_load(self.template_content) + if not isinstance(self.yaml_data, dict): + self.yaml_data = {} + self._update_editors() # Update the editors with the reset content + TemplateManager.save_template(self.template_name, self.template_content) + tooltip("Template reset.", parent=self) + def _check_and_install_uv() -> None: """Check if uv is installed and install it if not.""" From b6bf5cd2749e469818164adc22a11c88304fabd6 Mon Sep 17 00:00:00 2001 From: RisingOrange Date: Mon, 17 Feb 2025 13:38:50 +0100 Subject: [PATCH 50/53] Don't reinstall providers every time --- ankihub/labs/llm/llm.py | 41 ++++++++++++++++++++++++++++------------- ankihub/labs/llm/llm.sh | 12 +++++------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 9212dceea..873c18b62 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -343,21 +343,36 @@ def _install_llm() -> None: LOGGER.warning("Failed to install llm", error=str(e)) raise e - # Install providers regardless of llm installation status + # Check installed providers try: - if platform.system() == "Windows": - providers = ["llm-gemini", "llm-perplexity", "llm-claude-3"] - for provider in providers: - subprocess.run( - ["uv", "run", "--no-project", "llm", "install", "-U", provider], - check=True, - ) - else: - subprocess.run([str(LLM_SCRIPT_PATH), "install_providers"], check=True) - LOGGER.info("Successfully installed llm providers") + result = subprocess.run( + ["llm", "plugins"], capture_output=True, text=True, check=True + ) + installed_plugins = {plugin["name"] for plugin in json.loads(result.stdout)} except subprocess.CalledProcessError as e: - LOGGER.warning("Failed to install llm providers", error=str(e)) - raise e + LOGGER.warning("Failed to check installed providers", error=str(e)) + installed_plugins = set() + + # Install providers if not already installed + providers = ["llm-gemini", "llm-perplexity", "llm-anthropic"] + for provider in providers: + if provider in installed_plugins: + LOGGER.info(f"{provider} is already installed") + else: + try: + if platform.system() == "Windows": + subprocess.run( + ["uv", "run", "--no-project", "llm", "install", "-U", provider], + check=True, + ) + else: + subprocess.run( + [str(LLM_SCRIPT_PATH), "install_provider", provider], check=True + ) + LOGGER.info(f"Successfully installed {provider}") + except subprocess.CalledProcessError as e: + LOGGER.warning(f"Failed to install {provider}", error=str(e)) + raise e def setup() -> None: diff --git a/ankihub/labs/llm/llm.sh b/ankihub/labs/llm/llm.sh index 609c10518..0be23ef88 100755 --- a/ankihub/labs/llm/llm.sh +++ b/ankihub/labs/llm/llm.sh @@ -28,11 +28,9 @@ function install_llm() { "$UV_PATH" tool install llm } -function install_providers(){ - # Install additional providers - "$UV_PATH" run --no-project llm install -U llm-gemini - "$UV_PATH" run --no-project llm install -U llm-perplexity - "$UV_PATH" run --no-project llm install -U llm-claude-3 +function install_provider(){ + echo "Installing provider: '$1'" + "$UV_PATH" run --no-project llm install -U "$1" } function get_templates_path() { @@ -67,8 +65,8 @@ case $cmd in "install_llm") install_llm ;; - "install_providers") - install_providers + "install_provider") + install_provider "$@" ;; "get_templates_path") get_templates_path From aef1b5cb9a47733315ceb18ad6785d1429083645 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Fri, 28 Feb 2025 08:55:44 +0000 Subject: [PATCH 51/53] Tweak template. --- .../llm/prompt_templates/anki-improve-cloze-note.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml b/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml index 4927c370c..dc60cd27b 100644 --- a/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml +++ b/ankihub/labs/llm/prompt_templates/anki-improve-cloze-note.yaml @@ -2,12 +2,18 @@ model: gpt-4o system: | You are an expert at creating effective flashcards. Analyze the provided cloze deletion note and respond with an improved note according to the users instructions. Follow these guidelines: - - Use cloze deletions to hide key information, typically one or two words at a time. + - Use cloze deletions to hide key information. - Use the {{c1::hidden text}} format for cloze deletions. You can also use {{c2::hidden text}}, {{c3::hidden text}}, and so on. This will result in multiple cards being created for the note by Anki. Cloze deletions with the same number will be hidden on the same card, while all the other cloze deletions won't be used for this card. - Most notes should only have one cloze deletion. Focus on the highest yield concepts and facts. "high-yield" means the information is most important or most frequently tested on exams. - Create focused cards which tests a single concept or fact. - Ensure the remaining context provides enough information for the user to recall the hidden content. + ## Common Mistakes + Avoid these common mistakes when creating cloze deletion cards: + - Occluding too much information, making the card impossible to answer. + - E.g., "The {{c1::sky}} is {{c1::blue}}." This card is impossible to answer because both "sky" and "blue" are occluded. + + Here are some examples of well-constructed cloze deletion notes: - Persistent {{c1::infection}} is the most common symptom of chronic sinopulmonary disease - Chronic sinopulmonary disease commonly presents with chronic {{c1::cough}} and {{c1::sputum}} production. From b1a6259ed81f5cc258e6d73b41d48bcd80e1d998 Mon Sep 17 00:00:00 2001 From: Andrew Sanchez Date: Fri, 28 Feb 2025 09:15:23 +0000 Subject: [PATCH 52/53] Handle error on user defined templates. --- ankihub/labs/llm/llm.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 873c18b62..1cc635715 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -293,13 +293,29 @@ def _on_reset(self) -> None: local_template_path = ( TemplateManager._local_templates_dir / f"{self.template_name}.yaml" ) - self.template_content = local_template_path.read_text(encoding="utf-8") - self.yaml_data = yaml.safe_load(self.template_content) - if not isinstance(self.yaml_data, dict): - self.yaml_data = {} - self._update_editors() # Update the editors with the reset content - TemplateManager.save_template(self.template_name, self.template_content) - tooltip("Template reset.", parent=self) + + if not local_template_path.exists(): + QMessageBox.warning( + self, + "Template Not Found", + f"No local template found for {self.template_name}. Cannot reset to original version.", + ) + return + + try: + self.template_content = local_template_path.read_text(encoding="utf-8") + self.yaml_data = yaml.safe_load(self.template_content) + if not isinstance(self.yaml_data, dict): + self.yaml_data = {} + self._update_editors() # Update the editors with the reset content + TemplateManager.save_template(self.template_name, self.template_content) + tooltip("Template reset.", parent=self) + except Exception as e: + QMessageBox.critical( + self, + "Reset Failed", + f"Failed to reset template: {str(e)}", + ) def _check_and_install_uv() -> None: From ee5b09c65551657332e0bca141572bd5ae5b3be4 Mon Sep 17 00:00:00 2001 From: Abdo Date: Mon, 17 Mar 2025 17:28:29 +0300 Subject: [PATCH 53/53] Add llm.ps1 for Windows --- ankihub/labs/llm/llm.ps1 | 68 +++++++++++++++++++++++++++++++++ ankihub/labs/llm/llm.py | 69 +++++++++++++++------------------- ankihub/labs/secrets/dialog.py | 11 ++---- 3 files changed, 101 insertions(+), 47 deletions(-) create mode 100644 ankihub/labs/llm/llm.ps1 diff --git a/ankihub/labs/llm/llm.ps1 b/ankihub/labs/llm/llm.ps1 new file mode 100644 index 000000000..f9491e953 --- /dev/null +++ b/ankihub/labs/llm/llm.ps1 @@ -0,0 +1,68 @@ +# Ensure ~/.local/bin is in PATH +$env:PATH = "$HOME\.local\bin;$env:PATH" + +# Define UV executable path +$UV_PATH = "$HOME\.local\bin\uv" + +function Check-Uv { + & "$UV_PATH" version +} + +function Install-Uv { + # Install uv using the official installer + Invoke-RestMethod https://astral.sh/uv/install.ps1 | Invoke-Expression + + # Add uv to the current PATH + $env:PATH = "$HOME\.local\bin;$env:PATH" +} + +function Check-Llm { + & llm --version +} + +function Install-Llm { + # Install llm using uv + & "$UV_PATH" tool install llm +} + +function Install-Provider { + param ( + [string]$provider + ) + Write-Output "Installing provider: '$provider'" + & "$UV_PATH" run --no-project llm install -U $provider +} + +function Get-Templates-Path { + & "$UV_PATH" run --no-project llm templates path +} + +function Get-Keys-Path { + & "$UV_PATH" run --no-project llm keys path +} + +function Execute-Prompt { + param ( + [string]$templateName, + [string]$noteContent + ) + & "$UV_PATH" run --no-project llm -m gpt-4o --no-stream -t "$templateName" "$noteContent" +} + +$cmd = $args[0] +$args = $args[1..$args.Length] + +switch ($cmd) { + "check_uv" { Check-Uv } + "install_uv" { Install-Uv } + "check_llm" { Check-Llm } + "install_llm" { Install-Llm } + "install_provider" { Install-Provider -provider $args[0] } + "get_templates_path" { Get-Templates-Path } + "get_keys_path" { Get-Keys-Path } + "execute_prompt" { Execute-Prompt -templateName $args[0] -noteContent $args[1] } + default { + Write-Output "Unknown command: $cmd" + exit 1 + } +} diff --git a/ankihub/labs/llm/llm.py b/ankihub/labs/llm/llm.py index 1cc635715..bfd1d19d4 100644 --- a/ankihub/labs/llm/llm.py +++ b/ankihub/labs/llm/llm.py @@ -3,6 +3,7 @@ import difflib import json import platform +import shutil import subprocess import sys from pathlib import Path @@ -31,12 +32,30 @@ PROMPT_SELECTOR_BTN_ID = "ankihub-btn-llm-prompt" -LLM_SCRIPT_PATH = Path(__file__).parent / "llm.sh" - # Modify the system path by prepending $HOME/.local/bin with sys.path.insert sys.path.insert(0, str(Path.home() / ".local/bin")) +def run_llm_script(command: str, *params: Any) -> str: + script_path = ( + Path(__file__).parent + / f"llm.{'ps1' if platform.system() == 'Windows' else 'sh'}" + ) + if platform.system() == "Windows": + args = [shutil.which("powershell"), "-File", str(script_path)] + else: + args = [str(script_path)] + args.extend([command, *params]) + result = subprocess.run( + args, + capture_output=True, + text=True, + check=True, + ) + + return result.stdout + + class TemplateManager: """Manages LLM template operations and caching.""" @@ -47,13 +66,8 @@ class TemplateManager: def initialize(cls) -> None: """Initialize the template manager by finding the templates directory.""" try: - result = subprocess.run( - [str(LLM_SCRIPT_PATH), "get_templates_path"], - capture_output=True, - text=True, - check=True, - ) - cls._templates_path = Path(result.stdout.strip()) + result = run_llm_script("get_templates_path") + cls._templates_path = Path(result.strip()) LOGGER.info("Templates directory found", path=str(cls._templates_path)) # After finding templates path, try to copy local templates @@ -321,20 +335,11 @@ def _on_reset(self) -> None: def _check_and_install_uv() -> None: """Check if uv is installed and install it if not.""" try: - subprocess.run( - [str(LLM_SCRIPT_PATH), "check_uv"], capture_output=True, check=True - ) + run_llm_script("check_uv") LOGGER.info("uv is installed") except (subprocess.CalledProcessError, FileNotFoundError): try: - if platform.system() == "Windows": - subprocess.run( - 'powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"', - shell=True, - check=True, - ) - else: # macOS and Linux - subprocess.run([str(LLM_SCRIPT_PATH), "install_uv"], check=True) + run_llm_script("install_uv") except subprocess.CalledProcessError as e: LOGGER.warning("Failed to install uv", error=str(e)) raise e @@ -344,16 +349,14 @@ def _install_llm() -> None: """Install llm and additional providers using uv if not already installed.""" # TODO Prompt users to set up their API keys. try: - subprocess.run( - [str(LLM_SCRIPT_PATH), "check_llm"], capture_output=True, check=True - ) + run_llm_script("check_llm") LOGGER.info("llm is already installed") except (subprocess.CalledProcessError, FileNotFoundError): try: if platform.system() == "Windows": subprocess.run(["uv", "tool", "install", "llm"], check=True) else: - subprocess.run([str(LLM_SCRIPT_PATH), "install_llm"], check=True) + run_llm_script("install_llm") LOGGER.info("Successfully installed llm") except subprocess.CalledProcessError as e: LOGGER.warning("Failed to install llm", error=str(e)) @@ -382,9 +385,7 @@ def _install_llm() -> None: check=True, ) else: - subprocess.run( - [str(LLM_SCRIPT_PATH), "install_provider", provider], check=True - ) + run_llm_script("install_provider", provider) LOGGER.info(f"Successfully installed {provider}") except subprocess.CalledProcessError as e: LOGGER.warning(f"Failed to install {provider}", error=str(e)) @@ -582,20 +583,10 @@ def _execute_prompt_template( try: # Run the LLM command with the template and note content - result = subprocess.run( - [ - str(LLM_SCRIPT_PATH), - "execute_prompt", - template_name, - text_field, - ], - capture_output=True, - text=True, - check=True, - ) + result = run_llm_script("execute_prompt", template_name, text_field) # Show the response in a dialog - _show_llm_response(editor, result.stdout) + _show_llm_response(editor, result) except subprocess.CalledProcessError as e: error_msg = f"Error running LLM command: {e.stderr}" diff --git a/ankihub/labs/secrets/dialog.py b/ankihub/labs/secrets/dialog.py index 4f07f4cfe..22ba0ab57 100644 --- a/ankihub/labs/secrets/dialog.py +++ b/ankihub/labs/secrets/dialog.py @@ -8,6 +8,7 @@ from aqt.utils import showWarning, tooltip from ...gui.utils import active_window_or_mw +from ..llm.llm import run_llm_script class SecretsDialog(QDialog): @@ -29,14 +30,8 @@ def __init__(self): def _get_keys_file_path(self) -> Path: """Get the path to the keys.json file.""" try: - script_path = Path(__file__).parent.parent / "llm" / "llm.sh" - result = subprocess.run( - [str(script_path), "get_keys_path"], - capture_output=True, - text=True, - check=True, - ) - return Path(result.stdout.strip()) + result = run_llm_script("get_keys_path") + return Path(result.strip()) except subprocess.CalledProcessError as e: raise Exception( "Failed to get LLM keys path. Please run 'llm setup' in your terminal first.\n\n"