diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 7a3ed76..8891100 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,9 +4,10 @@ updates: directory: "/" schedule: interval: "monthly" + ignore: + # no longer supports compilation but most packages are not in pypi yet (#29) + - dependency-name: "tree_sitter" groups: python-packages: patterns: - "*" - exclude-patterns: - - "tree_sitter" diff --git a/Makefile b/Makefile index cf4fe3e..98f74cb 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,10 @@ install: pip install . install-development: - pip install -e .[dev] + pip install -e .[dev,transformers] install-test: - pip install .[dev] + pip install .[dev,transformers] uninstall: pip uninstall textLSP diff --git a/README.md b/README.md index 2d9bf34..ceb7433 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,14 @@ # textLSP + Language server for text spell and grammar check with various AI tools. _This tool is in early development._ ![textLSP](https://user-images.githubusercontent.com/414596/219856412-8095caa5-9ce6-49fe-9713-78d234837ac4.png) -# Features +## Features -## LSP features +### LSP features * Diagnostics: * spelling or grammatical errors @@ -69,9 +70,6 @@ magic command (see the OpenAI analyser below). * [hf_completion](https://huggingface.co/docs/transformers/task_summary#language-modeling): Huggingface `fill-mask` pipeline based text completion. * [Gramformer](https://github.com/PrithivirajDamodaran/Gramformer): Neural network based system. - * Gramformer needs to be installed manually: - - ```pip install git+https://github.com/PrithivirajDamodaran/Gramformer.git``` ### Tools using remote services @@ -81,6 +79,9 @@ The following tools use remote text APIs. Due to potential costs turning off automatic analysis if suggested. * [OpenAI](https://openai.com/api): Supports text correction as well as text generation through a magic command in the text file. + * A custom URL can be set to use an OpenAI-compatible server. See the example + [configuration](#configuration) below. +
Generation showcase
@@ -93,9 +94,9 @@ Due to potential costs turning off automatic analysis if suggested. * markdown * any other file types as plain text -# Setup +## Setup -## Install +### Install ``` pip install textLSP ``` @@ -105,7 +106,20 @@ For the latest version: pip install git+https://github.com/hangyav/textLSP ``` -## Running +#### Additional dependencies +Some analyzers need additional dependencies! + +* hf_checker, hf_instruction_checker and hf_completion: +``` +pip install textLSP[transformers] +``` + +* Gramformer needs to be installed manually: +``` +pip install git+https://github.com/PrithivirajDamodaran/Gramformer.git +``` + +### Running Simply run: ``` textlsp @@ -120,7 +134,7 @@ or simply over ssh (with ssh key) if the client doesn't support it: ssh textlsp ``` -## Configuration +### Configuration Using textLSP within an editor depends on the editor of choice. For a few examples how to set up language servers in general in some of the popular editors see [here](https://github.com/openlawlibrary/pygls/tree/master/examples/hello-world#editor-configurations) or take a look at the related documentation of your editor. @@ -194,6 +208,7 @@ textLSP = { openai = { enabled = false, api_key = '', + -- url = '' -- optional to use an OpenAI-compatible server check_text = { on_open = false, on_save = false, diff --git a/setup.py b/setup.py index e28fa09..24315fc 100644 --- a/setup.py +++ b/setup.py @@ -33,11 +33,8 @@ def read(fname): 'tree_sitter==0.21.3', 'gitpython==3.1.43', 'appdirs==1.4.4', - 'torch==2.3.0', 'openai==1.30.5', - 'transformers==4.41.2', 'sortedcontainers==2.4.0', - 'bitsandbytes==0.43.1', 'langdetect==1.0.9', 'ollama==0.2.0', ], @@ -47,6 +44,11 @@ def read(fname): 'python-lsp-jsonrpc==1.1.2', 'pytest-cov==5.0.0', 'coverage-threshold==0.4.4' - ] + ], + 'transformers': [ + 'torch==2.3.0', + 'transformers==4.41.2', + 'bitsandbytes==0.43.1', + ], }, ) diff --git a/tests/analysers/analyser_test.py b/tests/analysers/analyser_test.py index aaa4c40..808cbc0 100644 --- a/tests/analysers/analyser_test.py +++ b/tests/analysers/analyser_test.py @@ -330,7 +330,7 @@ def test_diagnostics_bug2(json_converter, langtool_ls_onsave): langtool_ls_onsave.notify_did_open( json_converter.unstructure(open_params) ) - assert done.wait(30) + assert done.wait(60) done.clear() change_params = DidChangeTextDocumentParams( @@ -351,7 +351,7 @@ def test_diagnostics_bug2(json_converter, langtool_ls_onsave): langtool_ls_onsave.notify_did_change( json_converter.unstructure(change_params) ) - assert done.wait(30) + assert done.wait(60) done.clear() save_params = DidSaveTextDocumentParams( @@ -362,7 +362,7 @@ def test_diagnostics_bug2(json_converter, langtool_ls_onsave): langtool_ls_onsave.notify_did_save( json_converter.unstructure(save_params) ) - assert done.wait(30) + assert done.wait(60) done.clear() change_params = DidChangeTextDocumentParams( @@ -383,7 +383,7 @@ def test_diagnostics_bug2(json_converter, langtool_ls_onsave): langtool_ls_onsave.notify_did_change( json_converter.unstructure(change_params) ) - assert done.wait(30) + assert done.wait(60) done.clear() save_params = DidSaveTextDocumentParams( @@ -394,7 +394,7 @@ def test_diagnostics_bug2(json_converter, langtool_ls_onsave): langtool_ls_onsave.notify_did_save( json_converter.unstructure(save_params) ) - assert done.wait(30) + assert done.wait(60) done.clear() exp_lst = [ diff --git a/textLSP/analysers/handler.py b/textLSP/analysers/handler.py index 6622815..39e5e8d 100644 --- a/textLSP/analysers/handler.py +++ b/textLSP/analysers/handler.py @@ -59,12 +59,12 @@ def update_settings(self, settings): ) except ImportError as e: self.language_server.show_message( - str(e), + f"Error ({name}): {str(e)}", MessageType.Error, ) except ConfigurationError as e: self.language_server.show_message( - str(e), + f"Error ({name}): {str(e)}", MessageType.Error, ) diff --git a/textLSP/analysers/openai/openai.py b/textLSP/analysers/openai/openai.py index 2ab9eb3..805ed67 100644 --- a/textLSP/analysers/openai/openai.py +++ b/textLSP/analysers/openai/openai.py @@ -3,20 +3,20 @@ from typing import List, Tuple, Optional from lsprotocol.types import ( - Diagnostic, - Range, - Position, - TextEdit, - CodeAction, - WorkspaceEdit, - Command, - CodeActionParams, - TextDocumentEdit, - VersionedTextDocumentIdentifier, - CompletionParams, - CompletionList, - CompletionItem, - MessageType, + Diagnostic, + Range, + Position, + TextEdit, + CodeAction, + WorkspaceEdit, + Command, + CodeActionParams, + TextDocumentEdit, + VersionedTextDocumentIdentifier, + CompletionParams, + CompletionList, + CompletionItem, + MessageType, ) from pygls.server import LanguageServer @@ -29,18 +29,20 @@ class OpenAIAnalyser(Analyser): - CONFIGURATION_API_KEY = 'api_key' - CONFIGURATION_MODEL = 'model' - CONFIGURATION_EDIT_INSTRUCTION = 'edit_instruction' - CONFIGURATION_TEMPERATURE = 'temperature' - CONFIGURATION_MAX_TOKEN = 'max_token' - CONFIGURATION_PROMPT_MAGIC = 'prompt_magic' - - SETTINGS_DEFAULT_MODEL = 'text-babbage-001' - SETTINGS_DEFAULT_EDIT_INSTRUCTION = 'Fix spelling and grammar errors.' + CONFIGURATION_API_KEY = "api_key" + CONFIGURATION_URL = "url" + CONFIGURATION_MODEL = "model" + CONFIGURATION_EDIT_INSTRUCTION = "edit_instruction" + CONFIGURATION_TEMPERATURE = "temperature" + CONFIGURATION_MAX_TOKEN = "max_token" + CONFIGURATION_PROMPT_MAGIC = "prompt_magic" + + SETTINGS_DEFAULT_URL = None + SETTINGS_DEFAULT_MODEL = "text-babbage-001" + SETTINGS_DEFAULT_EDIT_INSTRUCTION = "Fix spelling and grammar errors." SETTINGS_DEFAULT_TEMPERATURE = 0 SETTINGS_DEFAULT_MAX_TOKEN = 16 - SETTINGS_DEFAULT_PROMPT_MAGIC = '%OPENAI% ' + SETTINGS_DEFAULT_PROMPT_MAGIC = "%OPENAI% " SETTINGS_DEFAULT_CHECK_ON = { Analyser.CONFIGURATION_CHECK_ON_OPEN: False, Analyser.CONFIGURATION_CHECK_ON_CHANGE: False, @@ -50,8 +52,16 @@ class OpenAIAnalyser(Analyser): def __init__(self, language_server: LanguageServer, config: dict, name: str): super().__init__(language_server, config, name) if self.CONFIGURATION_API_KEY not in self.config: - raise ConfigurationError(f'Required parameter: {name}.{self.CONFIGURATION_API_KEY}') - self._client = OpenAI(api_key=self.config[self.CONFIGURATION_API_KEY]) + raise ConfigurationError( + f"Required parameter: {name}.{self.CONFIGURATION_API_KEY}" + ) + url = self.config.get(self.CONFIGURATION_URL, self.SETTINGS_DEFAULT_URL) + if url is not None and url.lower() == "none": + url = None + self._client = OpenAI( + api_key=self.config[self.CONFIGURATION_API_KEY], + base_url=url, + ) def _chat_endpoint( self, @@ -80,16 +90,25 @@ def _chat_endpoint( def _edit(self, text) -> List[TokenDiff]: res = self._chat_endpoint( - system_msg=self.config.get(self.CONFIGURATION_EDIT_INSTRUCTION, self.SETTINGS_DEFAULT_EDIT_INSTRUCTION), + system_msg=self.config.get( + self.CONFIGURATION_EDIT_INSTRUCTION, + self.SETTINGS_DEFAULT_EDIT_INSTRUCTION, + ), user_msg=text, - model=self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL), - temperature=self.config.get(self.CONFIGURATION_TEMPERATURE, self.SETTINGS_DEFAULT_TEMPERATURE), + model=self.config.get( + self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL + ), + temperature=self.config.get( + self.CONFIGURATION_TEMPERATURE, self.SETTINGS_DEFAULT_TEMPERATURE + ), ) logger.debug(f"Response: {res}") if len(res.choices) > 0: # the API escapes special characters such as newlines - res_text = res.choices[0].message.content.strip().encode().decode("unicode_escape") + res_text = ( + res.choices[0].message.content.strip().encode().decode("unicode_escape") + ) return TokenDiff.token_level_diff(text, res_text) return [] @@ -98,19 +117,29 @@ def _generate(self, text) -> Optional[str]: res = self._chat_endpoint( system_msg=text, user_msg=None, - model=self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL), - temperature=self.config.get(self.CONFIGURATION_TEMPERATURE, self.SETTINGS_DEFAULT_TEMPERATURE), - max_tokens=self.config.get(self.CONFIGURATION_MAX_TOKEN, self.SETTINGS_DEFAULT_MAX_TOKEN), + model=self.config.get( + self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL + ), + temperature=self.config.get( + self.CONFIGURATION_TEMPERATURE, self.SETTINGS_DEFAULT_TEMPERATURE + ), + max_tokens=self.config.get( + self.CONFIGURATION_MAX_TOKEN, self.SETTINGS_DEFAULT_MAX_TOKEN + ), ) logger.debug(f"Response: {res}") if len(res.choices) > 0: # the API escapes special characters such as newlines - return res.choices[0].message.content.strip().encode().decode("unicode_escape") + return ( + res.choices[0].message.content.strip().encode().decode("unicode_escape") + ) return None - def _analyse(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeAction]]: + def _analyse( + self, text, doc, offset=0 + ) -> Tuple[List[Diagnostic], List[CodeAction]]: diagnostics = list() code_actions = list() @@ -129,22 +158,22 @@ def _analyse(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeActi for edit in edits: if edit.type == TokenDiff.INSERT: if edit.offset >= len(text): - edit.new_token = f' {edit.new_token}' + edit.new_token = f" {edit.new_token}" else: - edit.new_token = f' {edit.new_token} ' - edit.old_token = ' ' + edit.new_token = f" {edit.new_token} " + edit.old_token = " " edit.offset -= 1 edit.length += 1 token = edit.old_token - range = doc.range_at_offset(edit.offset+offset, edit.length, True) + range = doc.range_at_offset(edit.offset + offset, edit.length, True) range = Range( start=range.start, end=Position( line=range.end.line, - character=range.end.character+1, - ) + character=range.end.character + 1, + ), ) if edit.type == TokenDiff.INSERT: @@ -157,9 +186,9 @@ def _analyse(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeActi diagnostic = Diagnostic( range=range, message=message, - source='openai', + source="openai", severity=self.get_severity(), - code=f'openai:{edit.type}', + code=f"openai:{edit.type}", ) action = self.build_single_suggestion_action( doc=doc, @@ -179,7 +208,9 @@ def _did_open(self, doc: BaseDocument): diagnostics = list() code_actions = list() checked = set() - for paragraph in doc.paragraphs_at_offset(0, len(doc.cleaned_source), cleaned=True): + for paragraph in doc.paragraphs_at_offset( + 0, len(doc.cleaned_source), cleaned=True + ): diags, actions = self._handle_paragraph(doc, paragraph) diagnostics.extend(diags) code_actions.extend(actions) @@ -195,7 +226,7 @@ def _did_change(self, doc: BaseDocument, changes: List[Interval]): for change in changes: paragraph = doc.paragraph_at_offset( change.start, - min_offset=change.start + change.length-1, + min_offset=change.start + change.length - 1, cleaned=True, ) if paragraph in checked: @@ -210,31 +241,22 @@ def _did_change(self, doc: BaseDocument, changes: List[Interval]): self.add_code_actions(doc, code_actions) def _handle_paragraph(self, doc: BaseDocument, paragraph: Interval): - if len(doc.text_at_offset(paragraph.start, paragraph.length, True).strip()) == 0: + if ( + len(doc.text_at_offset(paragraph.start, paragraph.length, True).strip()) + == 0 + ): return [], [] - pos_range = doc.range_at_offset( - paragraph.start, - paragraph.length, - True - ) + pos_range = doc.range_at_offset(paragraph.start, paragraph.length, True) self.remove_code_items_at_range(doc, pos_range) diags, actions = self._analyse( - doc.text_at_offset( - paragraph.start, - paragraph.length, - True - ), + doc.text_at_offset(paragraph.start, paragraph.length, True), doc, paragraph.start, ) - diagnostics = [ - diag - for diag in diags - if diag.range.start >= pos_range.start - ] + diagnostics = [diag for diag in diags if diag.range.start >= pos_range.start] code_actions = [ action for action in actions @@ -243,17 +265,11 @@ def _handle_paragraph(self, doc: BaseDocument, paragraph: Interval): return diagnostics, code_actions - def command_generate( - self, - uri: str, - prompt: str, - position: str, - new_line=True - ): + def command_generate(self, uri: str, prompt: str, position: str, new_line=True): with ProgressBar( - self.language_server, - f'{self.name} generating', - token=self._progressbar_token + self.language_server, + f"{self.name} generating", + token=self._progressbar_token, ): doc = self.get_document(uri) @@ -266,7 +282,7 @@ def command_generate( ) return - new_text += '\n' + new_text += "\n" position = Position(**eval(position)) range = Range( start=position, @@ -285,12 +301,11 @@ def command_generate( range=range, new_text=new_text, ), - - ] + ], ) ] ) - self.language_server.apply_edit(edit, 'textlsp.openai.generate') + self.language_server.apply_edit(edit, "textlsp.openai.generate") def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction]]: doc = self.get_document(params) @@ -299,18 +314,20 @@ def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction if len(doc.lines) > 0: line = doc.lines[params.range.start.line].strip() else: - line = '' - magic = self.config.get(self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC) + line = "" + magic = self.config.get( + self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC + ) if magic in line: if res is None: res = list() paragraph = doc.paragraph_at_position(params.range.start, False) - position = doc.position_at_offset(paragraph.start+paragraph.length, False) - position = str({'line': position.line, 'character': position.character}) + position = doc.position_at_offset(paragraph.start + paragraph.length, False) + position = str({"line": position.line, "character": position.character}) prompt = doc.text_at_offset(paragraph.start, paragraph.length, False) - prompt = prompt[prompt.find(magic)+len(magic):] - title = 'Prompt OpenAI' + prompt = prompt[prompt.find(magic) + len(magic) :] + title = "Prompt OpenAI" res.append( self.build_command_action( doc=doc, @@ -318,34 +335,40 @@ def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction command=Command( title=title, command=self.language_server.COMMAND_CUSTOM, - arguments=[{ - 'command': 'generate', - 'analyser': self.name, - 'uri': doc.uri, - 'prompt': prompt, - 'position': position, - 'new_line': True - }], + arguments=[ + { + "command": "generate", + "analyser": self.name, + "uri": doc.uri, + "prompt": prompt, + "position": position, + "new_line": True, + } + ], ), ) ) return res - def get_completions(self, params: Optional[CompletionParams] = None) -> Optional[CompletionList]: + def get_completions( + self, params: Optional[CompletionParams] = None + ) -> Optional[CompletionList]: if params.position == Position(line=0, character=0): return None doc = self.get_document(params) line = doc.lines[params.position.line] - magic = self.config.get(self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC) + magic = self.config.get( + self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC + ) - line_prefix = line[:params.position.character].strip() + line_prefix = line[: params.position.character].strip() if len(line_prefix) == 0 or line_prefix in magic: return [ CompletionItem( label=magic, - detail='OpenAI magic command for text generation based on' - ' the prompt that follows.' + detail="OpenAI magic command for text generation based on" + " the prompt that follows.", ) ] diff --git a/textLSP/nn_utils.py b/textLSP/nn_utils.py index 3745f70..6a9bcca 100644 --- a/textLSP/nn_utils.py +++ b/textLSP/nn_utils.py @@ -1,7 +1,12 @@ -import torch - from textLSP.types import ConfigurationError +try: + import torch +except ModuleNotFoundError: + raise ConfigurationError( + "Dependencies are missing for torch-based modules. Please look at textLSP's" + " documentation for installing additional dependencies." + ) def get_device(use_gpu: bool): if isinstance(use_gpu, str):