Merge pull request #18 from hangyav/feat/new_models

New instruction tuned models
hangyav · Jan 18, 2024 · d89dfce · d89dfce
2 parents 753edca + 1e340eb
commit d89dfce
Show file tree

Hide file tree

Showing 9 changed files with 330 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -42,6 +42,31 @@ The following tools run on the local system:
 
       ```pip install git+https://github.com/PrithivirajDamodaran/Gramformer.git```
 * hf_checker: Huggingface `text2text-generation` pipline based analyser. See the [flan-t5-large-grammar-synthesis](https://huggingface.co/pszemraj/flan-t5-large-grammar-synthesis) model for an example.
+   <details><summary>Models</summary>
+      <ul>
+       <li>pszemraj/grammar-synthesis-small</li>
+       <li>pszemraj/grammar-synthesis-large</li>
+       <li>pszemraj/flan-t5-large-grammar-synthesis</li>
+       <li>pszemraj/flan-t5-xl-grammar-synthesis</li>
+       <li>pszemraj/bart-base-grammar-synthesis</li>
+      </ul>
+   </details>
+* hf_instruction_checker: Huggingface `text2text-generation` pipline based
+analyser using instruction tuned models. See the Grammarly's
+[CoEdIT](https://github.com/vipulraheja/coedit) model for an example. Supports
+error checking and text generation, such as paraphrasing, through the `%HF%`
+magic command (see the OpenAI analyser below).
+   <details><summary>Models</summary>
+      <ul>
+       <li>grammarly/coedit-large</li>
+       <li>grammarly/coedit-xl</li>
+       <li>grammarly/coedit-xl-composite</li>
+       <li>grammarly/coedit-xxl</li>
+       <li>jbochi/coedit-base</li>
+       <li>jbochi/coedit-small</li>
+       <li>jbochi/candle-coedit-quantized</li>
+      </ul>
+   </details>
 * [hf_completion](https://huggingface.co/docs/transformers/task_summary#language-modeling): Huggingface `fill-mask` pipline based text completion.
 
 ### Tools using remote services
@@ -94,9 +119,9 @@ ssh <server> textlsp
 ## Configuration
 
 Using textLSP within an editor depends on the editor of choice.
-For a few examples how to setup language servers in general in some of the popular editors see [here](https://github.com/openlawlibrary/pygls/tree/master/examples/hello-world#editor-configurations) or take a look at the related documentation of your editor.
+For a few examples how to set up language servers in general in some of the popular editors see [here](https://github.com/openlawlibrary/pygls/tree/master/examples/hello-world#editor-configurations) or take a look at the related documentation of your editor.
 
-By default all analyzers are disabled in textLSP, they have to be turned on in the settings.
+By default, all analyzers are disabled in textLSP, they have to be turned on in the settings.
 Example configuration in lua for nvim (other editors should be set up accordingly):
 
 ```lua
@@ -121,10 +146,22 @@ textLSP = {
             }
         },
         hf_checker = {
-            enabled = true,
+            enabled = false,
             gpu = false,
+            quantize=32,
             model='pszemraj/flan-t5-large-grammar-synthesis',
-            -- model='pszemraj/grammar-synthesis-large',
+            min_length=40,
+            check_text = {
+                on_open = false,
+                on_save = true,
+                on_change = false,
+            }
+        },
+        hf_instruction_checker = {
+            enabled = true,
+            gpu = false,
+            quantize=32,
+            model='grammarly/coedit-large',
             min_length=40,
             check_text = {
                 on_open = false,
@@ -135,6 +172,7 @@ textLSP = {
         hf_completion = {
             enabled = true,
             gpu = false,
+            quantize=32,
             model='bert-base-multilingual-cased',
             topk=5,
         },

diff --git a/setup.py b/setup.py
@@ -37,6 +37,7 @@ def read(fname):
         'openai==1.6.1',
         'transformers==4.36.2',
         'sortedcontainers==2.4.0',
+        'bitsandbytes==0.42.0',
     ],
     extras_require={
         'dev': [

diff --git a/tests/analysers/hf_instruction_checker_test.py b/tests/analysers/hf_instruction_checker_test.py
@@ -0,0 +1,44 @@
+import pytest
+
+from textLSP.analysers.hf_instruction_checker import HFInstructionCheckerAnalyser
+from textLSP.documents.document import BaseDocument
+
+
+@pytest.fixture
+def analyser():
+    return HFInstructionCheckerAnalyser(
+        None,
+        {
+            HFInstructionCheckerAnalyser.CONFIGURATION_MODEL: 'grammarly/coedit-large',
+        },
+        'hf_checker',
+    )
+
+
+@pytest.mark.parametrize('doc,exp', [
+    (
+        BaseDocument(
+            'DUMMY_URL',
+            'This is a short sentence.',
+            version=1,
+        ),
+        False,
+    ),
+    (
+        BaseDocument(
+            'DUMMY_URL',
+            'This is a long enough sentence with an eror or tvo.',
+            version=1,
+        ),
+        True,
+    ),
+])
+def test_simple(doc, exp, analyser):
+    res_diag, res_action = analyser._analyse_lines(doc.cleaned_source, doc)
+
+    if exp:
+        assert len(res_diag) > 0
+        assert len(res_action) > 0
+    else:
+        assert len(res_diag) == 0
+        assert len(res_action) == 0
diff --git a/textLSP/analysers/hf_checker/hf_checker.py b/textLSP/analysers/hf_checker/hf_checker.py
@@ -9,6 +9,7 @@
         Position,
         TextEdit,
         CodeAction,
+        MessageType,
 )
 from pygls.server import LanguageServer
 from transformers import pipeline
@@ -18,8 +19,10 @@
     Interval,
     LINE_PATTERN,
     TokenDiff,
+    ConfigurationError,
 )
 from ...documents.document import BaseDocument
+from ... import nn_utils
 
 
 logger = logging.getLogger(__name__)
@@ -29,19 +32,40 @@ class HFCheckerAnalyser(Analyser):
     CONFIGURATION_GPU = 'gpu'
     CONFIGURATION_MODEL = 'model'
     CONFIGURATION_MIN_LENGTH = 'min_length'
+    CONFIGURATION_QUANTIZE = 'quantize'
 
     SETTINGS_DEFAULT_GPU = False
-    SETTINGS_DEFAULT_MODEL = 'pszemraj/flan-t5-large-grammar-synthesis'
-    SETTINGS_DEFAULT_MIN_LENGTH = 40
+    SETTINGS_DEFAULT_MODEL = 'grammarly/coedit-large'
+    SETTINGS_DEFAULT_MIN_LENGTH = 0
+    SETTINGS_DEFAULT_QUANTIZE = 32
 
     def __init__(self, language_server: LanguageServer, config: dict, name: str):
         super().__init__(language_server, config, name)
-        self.corrector = pipeline(
+        use_gpu = self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU)
+        device = nn_utils.get_device(use_gpu)
+
+        quanitze = self.config.setdefault(self.CONFIGURATION_QUANTIZE, self.SETTINGS_DEFAULT_QUANTIZE)
+        model_kwargs = dict()
+        try:
+            nn_utils.set_quantization_args(quanitze, device, model_kwargs)
+        except ConfigurationError as e:
+            language_server.show_message(
+                f'{self.name}: {str(e)}',
+                MessageType.Error,
+            )
+            self.config[self.CONFIGURATION_QUANTIZE] = 32
+
+        model = self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL)
+        self._corrector = pipeline(
             'text2text-generation',
-            self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL),
-            device='cuda:0' if self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU) else 'cpu',
+            model,
+            device=device,
+            model_kwargs=model_kwargs,
         )
 
+    def corrector(self, text):
+        return self._corrector(text)
+
     def _analyse_lines(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeAction]]:
         diagnostics = list()
         code_actions = list()

diff --git a/textLSP/analysers/hf_completion/hf_completion.py b/textLSP/analysers/hf_completion/hf_completion.py
@@ -9,10 +9,12 @@
         CodeAction,
 )
 from pygls.server import LanguageServer
+from lsprotocol.types import MessageType
 from transformers import pipeline
 
 from ..analyser import Analyser
 from ...types import ConfigurationError
+from ... import nn_utils
 
 
 logger = logging.getLogger(__name__)
@@ -23,19 +25,36 @@ class HFCompletionAnalyser(Analyser):
     CONFIGURATION_MODEL = 'model'
     CONFIGURATION_TOP_K = 'topk'
     CONFIGURATION_CONTEXT_SIZE = 'context_size'
+    CONFIGURATION_QUANTIZE = 'quantize'
 
     SETTINGS_DEFAULT_GPU = False
     SETTINGS_DEFAULT_MODEL = 'bert-base-multilingual-cased'
     SETTINGS_DEFAULT_TOP_K = 5
     SETTINGS_DEFAULT_CONTEXT_SIZE = 50
+    SETTINGS_DEFAULT_QUANTIZE = 32
 
     def __init__(self, language_server: LanguageServer, config: dict, name: str):
         super().__init__(language_server, config, name)
+        use_gpu = self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU)
+        device = nn_utils.get_device(use_gpu)
+
+        quanitze = self.config.setdefault(self.CONFIGURATION_QUANTIZE, self.SETTINGS_DEFAULT_QUANTIZE)
+        model_kwargs = dict()
+        try:
+            nn_utils.set_quantization_args(quanitze, device, model_kwargs)
+        except ConfigurationError as e:
+            language_server.show_message(
+                f'{self.name}: {str(e)}',
+                MessageType.Error,
+            )
+            self.config[self.CONFIGURATION_QUANTIZE] = 32
+
         model = self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL)
         self.completor = pipeline(
             'fill-mask',
             model,
-            device='cuda:0' if self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU) else 'cpu',
+            device=device,
+            model_kwargs=model_kwargs,
         )
         if self.completor.tokenizer.mask_token is None:
             raise ConfigurationError(f'The tokenizer of {model} does not have a MASK token.')

diff --git a/textLSP/analysers/hf_instruction_checker/__init__.py b/textLSP/analysers/hf_instruction_checker/__init__.py
@@ -0,0 +1 @@
+from .hf_instruction_checker import HFInstructionCheckerAnalyser
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .hf_instruction_checker import HFInstructionCheckerAnalyser