feat: now ignores Go compile directives

Automattic · Mar 6, 2024 · 66332dc · 66332dc
1 parent 9d09099
commit 66332dc
Show file tree

Hide file tree

Showing 11 changed files with 158 additions and 43 deletions.
diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict
@@ -30380,6 +30380,7 @@ lineage/MS
 lineal/Y
 lineament/SM
 linear/Y
+superlinear/Y
 linearity/M
 linebacker/MS
 lined/U

diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs
@@ -25,6 +25,10 @@ impl Document {
     pub fn new(text: &str, parser: Box<dyn Parser>) -> Self {
         let source: Vec<_> = text.chars().collect();
 
+        Self::new_from_vec(source, parser)
+    }
+
+    pub fn new_from_vec(source: Vec<char>, parser: Box<dyn Parser>) -> Self {
         let mut doc = Self {
             source,
             tokens: Vec::new(),

diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs
@@ -99,6 +99,8 @@ impl Matcher {
         // This match list needs to be automatically expanded instead of explicitly
         // defined like it is now.
         let mut triggers = pt! {
+            "off","the","cuff" => "off-the-cuff",
+            "an","in" => "and in",
             "repo" => "repository",
             "repos" => "repositories",
             "my","self" => "myself",

diff --git a/harper-core/src/span.rs b/harper-core/src/span.rs
@@ -33,14 +33,21 @@ impl Span {
         self.start.max(other.start) <= self.end.min(other.end)
     }
 
-    pub fn get_content<'a>(&self, source: &'a [char]) -> &'a [char] {
-        if cfg!(debug_assertions) {
-            assert!(self.start < self.end);
-            assert!(self.start < source.len());
-            assert!(self.end <= source.len());
+    pub fn is_valid() {}
+
+    /// Get the associated content. Will return [`None`] if any aspect is
+    /// invalid.
+    pub fn try_get_content<'a>(&self, source: &'a [char]) -> Option<&'a [char]> {
+        if (self.start > self.end) || (self.start >= source.len()) || (self.end > source.len()) {
+            return None;
         }
 
-        &source[self.start..self.end]
+        Some(&source[self.start..self.end])
+    }
+
+    /// Get the associated content. Will panic if any aspect is invalid.
+    pub fn get_content<'a>(&self, source: &'a [char]) -> &'a [char] {
+        self.try_get_content(source).unwrap()
     }
 
     pub fn get_content_string(&self, source: &[char]) -> String {

diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs
@@ -192,9 +192,9 @@ impl Backend {
             if let Some(ts_parser) =
                 TreeSitterParser::new_from_extension(&extension.to_string_lossy())
             {
-                let doc = Document::new(text, Box::new(ts_parser.clone()));
+                let source: Vec<char> = text.chars().collect();
 
-                if let Some(new_dict) = ts_parser.create_ident_dict(doc.get_full_content()) {
+                if let Some(new_dict) = ts_parser.create_ident_dict(source.as_slice()) {
                     let new_dict = Arc::new(new_dict);
 
                     if doc_state.ident_dict != new_dict {
@@ -206,7 +206,7 @@ impl Backend {
                     }
                 }
 
-                doc
+                Document::new_from_vec(source, Box::new(ts_parser))
             } else {
                 Document::new(text, Box::new(Markdown))
             }

diff --git a/harper-ls/src/comment_parsers/go.rs b/harper-ls/src/comment_parsers/go.rs
@@ -0,0 +1,41 @@
+use harper_core::parsers::{Markdown, Parser};
+use harper_core::Token;
+
+use super::without_intiators;
+
+#[derive(Debug, Clone, Copy)]
+pub struct Go;
+
+impl Parser for Go {
+    fn parse(&mut self, source: &[char]) -> Vec<Token> {
+        let mut actual = without_intiators(source);
+        let mut actual_source = actual.get_content(source);
+
+        if matches!(source, ['g', 'o', ':', ..]) {
+            let Some(terminator) = source.iter().position(|c| c.is_whitespace()) else {
+                return Vec::new();
+            };
+
+            actual.start += terminator;
+
+            let Some(new_source) = actual.try_get_content(actual_source) else {
+                return Vec::new();
+            };
+
+            actual_source = new_source
+        }
+
+        let mut markdown_parser = Markdown;
+
+        let mut new_tokens = markdown_parser.parse(actual_source);
+
+        new_tokens
+            .iter_mut()
+            .for_each(|t| t.span.offset(actual.start));
+
+        new_tokens
+    }
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/harper-ls/src/comment_parsers/mod.rs b/harper-ls/src/comment_parsers/mod.rs
@@ -0,0 +1,30 @@
+mod go;
+mod unit;
+
+pub use go::Go;
+use harper_core::Span;
+pub use unit::Unit;
+
+/// Get the span of a tree-sitter-produced comment that doesn't include the
+/// comment openers and closers.
+fn without_intiators(source: &[char]) -> Span {
+    // Skip over the comment start characters
+    let actual_start = source
+        .iter()
+        .position(|c| !is_comment_character(*c))
+        .unwrap_or(0);
+
+    // Chop off the end
+    let actual_end = source.len()
+        - source
+            .iter()
+            .rev()
+            .position(|c| !is_comment_character(*c))
+            .unwrap_or(0);
+
+    Span::new(actual_start, actual_end)
+}
+
+fn is_comment_character(c: char) -> bool {
+    matches!(c, '#' | '-' | '/' | '*')
+}
diff --git a/harper-ls/src/comment_parsers/unit.rs b/harper-ls/src/comment_parsers/unit.rs
@@ -0,0 +1,29 @@
+use harper_core::parsers::{Markdown, Parser};
+use harper_core::Token;
+
+use super::without_intiators;
+
+/// A comment parser that strips starting `/` and `*` characters.
+///
+/// It is meant to cover _most_ cases in _most_ programming languages.
+///
+/// It assumes it is being provided a single line of comment at a time,
+/// including the comment initiation characters.
+pub struct Unit;
+
+impl Parser for Unit {
+    fn parse(&mut self, source: &[char]) -> Vec<Token> {
+        let actual = without_intiators(source);
+        let source = actual.get_content(source);
+
+        let mut markdown_parser = Markdown;
+
+        let mut new_tokens = markdown_parser.parse(source);
+
+        new_tokens
+            .iter_mut()
+            .for_each(|t| t.span.offset(actual.start));
+
+        new_tokens
+    }
+}
diff --git a/harper-ls/src/main.rs b/harper-ls/src/main.rs
@@ -3,6 +3,7 @@ use std::io::stderr;
 use config::Config;
 use tokio::net::TcpListener;
 mod backend;
+mod comment_parsers;
 mod config;
 mod diagnostics;
 mod dictionary_io;

diff --git a/harper-ls/src/tree_sitter_parser.rs b/harper-ls/src/tree_sitter_parser.rs
@@ -1,14 +1,16 @@
 use std::collections::HashSet;
 
-use harper_core::parsers::{Markdown, Parser};
-use harper_core::{FullDictionary, Span};
+use harper_core::parsers::Parser;
+use harper_core::{FullDictionary, Span, Token};
 use tree_sitter::{Language, Node, Tree, TreeCursor};
 
-/// A Harper parser that wraps the standard [`Markdown`] parser that exclusively
-/// parses comments in any language supported by [`tree_sitter`].
-#[derive(Debug, Clone)]
+use super::comment_parsers::{Go, Unit};
+
+/// A Harper parser that wraps various [`super::comment_parsers`] that
+/// exclusively parses comments in any language supported by [`tree_sitter`].
 pub struct TreeSitterParser {
-    language: Language
+    language: Language,
+    comment_parser: Box<dyn Parser>
 }
 
 impl TreeSitterParser {
@@ -32,7 +34,15 @@ impl TreeSitterParser {
             _ => return None
         };
 
-        Some(Self { language })
+        let comment_parser: Box<dyn Parser> = match file_extension {
+            "go" => Box::new(Go),
+            _ => Box::new(Unit)
+        };
+
+        Some(Self {
+            language,
+            comment_parser
+        })
     }
 
     fn parse_root(&self, text: &str) -> Option<Tree> {
@@ -88,61 +98,55 @@ impl TreeSitterParser {
             return;
         }
 
-        while cursor.goto_next_sibling() {
+        loop {
             let node = cursor.node();
 
             visit(&node);
 
             Self::visit_nodes(cursor, visit);
+
+            if !cursor.goto_next_sibling() {
+                break;
+            }
         }
 
         cursor.goto_parent();
     }
 }
 
 impl Parser for TreeSitterParser {
-    fn parse(&mut self, source: &[char]) -> Vec<harper_core::Token> {
+    fn parse(&mut self, source: &[char]) -> Vec<Token> {
         let text: String = source.iter().collect();
 
-        let mut markdown_parser = Markdown;
-
         let Some(root) = self.parse_root(&text) else {
             return vec![];
         };
 
         let mut comments_spans = Vec::new();
 
         Self::extract_comments(&mut root.walk(), &mut comments_spans);
+
+        dbg!(&comments_spans.len());
         byte_spans_to_char_spans(&mut comments_spans, &text);
+        dbg!(&comments_spans.len());
 
         let mut tokens = Vec::new();
 
         for (s_index, span) in comments_spans.iter().enumerate() {
-            // Skip over the comment start characters
-            let actual_start = source[span.start..span.end]
-                .iter()
-                .position(|c| !is_comment_character(*c))
-                .unwrap_or(0)
-                + span.start;
-
-            if span.end <= actual_start {
-                continue;
-            }
+            let mut new_tokens = self.comment_parser.parse(span.get_content(source));
 
-            let mut new_tokens = markdown_parser.parse(&source[actual_start..span.end]);
+            new_tokens
+                .iter_mut()
+                .for_each(|v| v.span.offset(span.start));
 
-            // The markdown parser will insert a newline at end-of-input.
+            // The comment parser will insert a newline at end-of-input.
             // If the next treesitter chunk is a comment, we want to remove that.
             if let Some(next_start) = comments_spans.get(s_index + 1).map(|v| v.start) {
                 if is_span_whitespace(Span::new(span.end, next_start), source) {
                     new_tokens.pop();
                 }
             }
 
-            new_tokens
-                .iter_mut()
-                .for_each(|t| t.span.offset(actual_start));
-
             tokens.append(&mut new_tokens);
         }
 
@@ -159,10 +163,6 @@ fn is_span_whitespace(span: Span, source: &[char]) -> bool {
         == 0
 }
 
-fn is_comment_character(c: char) -> bool {
-    matches!(c, '#' | '-' | '/')
-}
-
 /// Converts a set of byte-indexed [`Span`]s to char-index Spans, in-place.
 /// NOTE: Will sort the given slice by their [`Span::start`].
 ///
@@ -172,10 +172,10 @@ fn byte_spans_to_char_spans(byte_spans: &mut Vec<Span>, source: &str) {
 
     let cloned = byte_spans.clone();
 
-    let mut i = 0;
+    let mut i: usize = 0;
     byte_spans.retain(|cur| {
         i += 1;
-        if let Some(prev) = cloned.get(i - 2) {
+        if let Some(prev) = cloned.get(i.wrapping_sub(2)) {
             !cur.overlaps_with(*prev)
         } else {
             true

diff --git a/web/src/lib/Underlines.svelte b/web/src/lib/Underlines.svelte
@@ -20,7 +20,7 @@
 				.toSorted(([a], [b]) => a.span.start - b.span.end))
 	);
 	$: if (focusLintIndex != null && lintHighlights[focusLintIndex] != null)
-		lintHighlights[focusLintIndex].scrollIntoView({ behavior: 'smooth' });
+		lintHighlights[focusLintIndex].scrollIntoView({ behavior: 'smooth', block: 'center' });
 
 	function reOrgString(text: string): (string | undefined)[] {
 		if (text.trim().length == 0) {