Skip to content

Commit

Permalink
feat: now ignores Go compile directives
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Mar 6, 2024
1 parent 9d09099 commit 66332dc
Show file tree
Hide file tree
Showing 11 changed files with 158 additions and 43 deletions.
1 change: 1 addition & 0 deletions harper-core/dictionary.dict
Original file line number Diff line number Diff line change
Expand Up @@ -30380,6 +30380,7 @@ lineage/MS
lineal/Y
lineament/SM
linear/Y
superlinear/Y
linearity/M
linebacker/MS
lined/U
Expand Down
4 changes: 4 additions & 0 deletions harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ impl Document {
pub fn new(text: &str, parser: Box<dyn Parser>) -> Self {
let source: Vec<_> = text.chars().collect();

Self::new_from_vec(source, parser)
}

pub fn new_from_vec(source: Vec<char>, parser: Box<dyn Parser>) -> Self {
let mut doc = Self {
source,
tokens: Vec::new(),
Expand Down
2 changes: 2 additions & 0 deletions harper-core/src/linting/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ impl Matcher {
// This match list needs to be automatically expanded instead of explicitly
// defined like it is now.
let mut triggers = pt! {
"off","the","cuff" => "off-the-cuff",
"an","in" => "and in",
"repo" => "repository",
"repos" => "repositories",
"my","self" => "myself",
Expand Down
19 changes: 13 additions & 6 deletions harper-core/src/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,21 @@ impl Span {
self.start.max(other.start) <= self.end.min(other.end)
}

pub fn get_content<'a>(&self, source: &'a [char]) -> &'a [char] {
if cfg!(debug_assertions) {
assert!(self.start < self.end);
assert!(self.start < source.len());
assert!(self.end <= source.len());
pub fn is_valid() {}

/// Get the associated content. Will return [`None`] if any aspect is
/// invalid.
pub fn try_get_content<'a>(&self, source: &'a [char]) -> Option<&'a [char]> {
if (self.start > self.end) || (self.start >= source.len()) || (self.end > source.len()) {
return None;
}

&source[self.start..self.end]
Some(&source[self.start..self.end])
}

/// Get the associated content. Will panic if any aspect is invalid.
pub fn get_content<'a>(&self, source: &'a [char]) -> &'a [char] {
self.try_get_content(source).unwrap()
}

pub fn get_content_string(&self, source: &[char]) -> String {
Expand Down
6 changes: 3 additions & 3 deletions harper-ls/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,9 @@ impl Backend {
if let Some(ts_parser) =
TreeSitterParser::new_from_extension(&extension.to_string_lossy())
{
let doc = Document::new(text, Box::new(ts_parser.clone()));
let source: Vec<char> = text.chars().collect();

if let Some(new_dict) = ts_parser.create_ident_dict(doc.get_full_content()) {
if let Some(new_dict) = ts_parser.create_ident_dict(source.as_slice()) {
let new_dict = Arc::new(new_dict);

if doc_state.ident_dict != new_dict {
Expand All @@ -206,7 +206,7 @@ impl Backend {
}
}

doc
Document::new_from_vec(source, Box::new(ts_parser))
} else {
Document::new(text, Box::new(Markdown))
}
Expand Down
41 changes: 41 additions & 0 deletions harper-ls/src/comment_parsers/go.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
use harper_core::parsers::{Markdown, Parser};
use harper_core::Token;

use super::without_intiators;

#[derive(Debug, Clone, Copy)]
pub struct Go;

impl Parser for Go {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
let mut actual = without_intiators(source);
let mut actual_source = actual.get_content(source);

if matches!(source, ['g', 'o', ':', ..]) {
let Some(terminator) = source.iter().position(|c| c.is_whitespace()) else {
return Vec::new();
};

actual.start += terminator;

let Some(new_source) = actual.try_get_content(actual_source) else {
return Vec::new();
};

actual_source = new_source
}

let mut markdown_parser = Markdown;

let mut new_tokens = markdown_parser.parse(actual_source);

new_tokens
.iter_mut()
.for_each(|t| t.span.offset(actual.start));

new_tokens
}
}

#[cfg(test)]
mod tests {}
30 changes: 30 additions & 0 deletions harper-ls/src/comment_parsers/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
mod go;
mod unit;

pub use go::Go;
use harper_core::Span;
pub use unit::Unit;

/// Get the span of a tree-sitter-produced comment that doesn't include the
/// comment openers and closers.
fn without_intiators(source: &[char]) -> Span {
// Skip over the comment start characters
let actual_start = source
.iter()
.position(|c| !is_comment_character(*c))
.unwrap_or(0);

// Chop off the end
let actual_end = source.len()
- source
.iter()
.rev()
.position(|c| !is_comment_character(*c))
.unwrap_or(0);

Span::new(actual_start, actual_end)
}

fn is_comment_character(c: char) -> bool {
matches!(c, '#' | '-' | '/' | '*')
}
29 changes: 29 additions & 0 deletions harper-ls/src/comment_parsers/unit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use harper_core::parsers::{Markdown, Parser};
use harper_core::Token;

use super::without_intiators;

/// A comment parser that strips starting `/` and `*` characters.
///
/// It is meant to cover _most_ cases in _most_ programming languages.
///
/// It assumes it is being provided a single line of comment at a time,
/// including the comment initiation characters.
pub struct Unit;

impl Parser for Unit {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
let actual = without_intiators(source);
let source = actual.get_content(source);

let mut markdown_parser = Markdown;

let mut new_tokens = markdown_parser.parse(source);

new_tokens
.iter_mut()
.for_each(|t| t.span.offset(actual.start));

new_tokens
}
}
1 change: 1 addition & 0 deletions harper-ls/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::io::stderr;
use config::Config;
use tokio::net::TcpListener;
mod backend;
mod comment_parsers;
mod config;
mod diagnostics;
mod dictionary_io;
Expand Down
66 changes: 33 additions & 33 deletions harper-ls/src/tree_sitter_parser.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
use std::collections::HashSet;

use harper_core::parsers::{Markdown, Parser};
use harper_core::{FullDictionary, Span};
use harper_core::parsers::Parser;
use harper_core::{FullDictionary, Span, Token};
use tree_sitter::{Language, Node, Tree, TreeCursor};

/// A Harper parser that wraps the standard [`Markdown`] parser that exclusively
/// parses comments in any language supported by [`tree_sitter`].
#[derive(Debug, Clone)]
use super::comment_parsers::{Go, Unit};

/// A Harper parser that wraps various [`super::comment_parsers`] that
/// exclusively parses comments in any language supported by [`tree_sitter`].
pub struct TreeSitterParser {
language: Language
language: Language,
comment_parser: Box<dyn Parser>
}

impl TreeSitterParser {
Expand All @@ -32,7 +34,15 @@ impl TreeSitterParser {
_ => return None
};

Some(Self { language })
let comment_parser: Box<dyn Parser> = match file_extension {
"go" => Box::new(Go),
_ => Box::new(Unit)
};

Some(Self {
language,
comment_parser
})
}

fn parse_root(&self, text: &str) -> Option<Tree> {
Expand Down Expand Up @@ -88,61 +98,55 @@ impl TreeSitterParser {
return;
}

while cursor.goto_next_sibling() {
loop {
let node = cursor.node();

visit(&node);

Self::visit_nodes(cursor, visit);

if !cursor.goto_next_sibling() {
break;
}
}

cursor.goto_parent();
}
}

impl Parser for TreeSitterParser {
fn parse(&mut self, source: &[char]) -> Vec<harper_core::Token> {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
let text: String = source.iter().collect();

let mut markdown_parser = Markdown;

let Some(root) = self.parse_root(&text) else {
return vec![];
};

let mut comments_spans = Vec::new();

Self::extract_comments(&mut root.walk(), &mut comments_spans);

dbg!(&comments_spans.len());
byte_spans_to_char_spans(&mut comments_spans, &text);
dbg!(&comments_spans.len());

let mut tokens = Vec::new();

for (s_index, span) in comments_spans.iter().enumerate() {
// Skip over the comment start characters
let actual_start = source[span.start..span.end]
.iter()
.position(|c| !is_comment_character(*c))
.unwrap_or(0)
+ span.start;

if span.end <= actual_start {
continue;
}
let mut new_tokens = self.comment_parser.parse(span.get_content(source));

let mut new_tokens = markdown_parser.parse(&source[actual_start..span.end]);
new_tokens
.iter_mut()
.for_each(|v| v.span.offset(span.start));

// The markdown parser will insert a newline at end-of-input.
// The comment parser will insert a newline at end-of-input.
// If the next treesitter chunk is a comment, we want to remove that.
if let Some(next_start) = comments_spans.get(s_index + 1).map(|v| v.start) {
if is_span_whitespace(Span::new(span.end, next_start), source) {
new_tokens.pop();
}
}

new_tokens
.iter_mut()
.for_each(|t| t.span.offset(actual_start));

tokens.append(&mut new_tokens);
}

Expand All @@ -159,10 +163,6 @@ fn is_span_whitespace(span: Span, source: &[char]) -> bool {
== 0
}

fn is_comment_character(c: char) -> bool {
matches!(c, '#' | '-' | '/')
}

/// Converts a set of byte-indexed [`Span`]s to char-index Spans, in-place.
/// NOTE: Will sort the given slice by their [`Span::start`].
///
Expand All @@ -172,10 +172,10 @@ fn byte_spans_to_char_spans(byte_spans: &mut Vec<Span>, source: &str) {

let cloned = byte_spans.clone();

let mut i = 0;
let mut i: usize = 0;
byte_spans.retain(|cur| {
i += 1;
if let Some(prev) = cloned.get(i - 2) {
if let Some(prev) = cloned.get(i.wrapping_sub(2)) {
!cur.overlaps_with(*prev)
} else {
true
Expand Down
2 changes: 1 addition & 1 deletion web/src/lib/Underlines.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
.toSorted(([a], [b]) => a.span.start - b.span.end))
);
$: if (focusLintIndex != null && lintHighlights[focusLintIndex] != null)
lintHighlights[focusLintIndex].scrollIntoView({ behavior: 'smooth' });
lintHighlights[focusLintIndex].scrollIntoView({ behavior: 'smooth', block: 'center' });
function reOrgString(text: string): (string | undefined)[] {
if (text.trim().length == 0) {
Expand Down

0 comments on commit 66332dc

Please sign in to comment.