Skip to content

Commit a6c464f

Browse files
authored
Merge pull request #3798 from Hannah-Sten/grazie
Improve Grazie implementation
2 parents 052a869 + bfdba9e commit a6c464f

File tree

8 files changed

+140
-63
lines changed

8 files changed

+140
-63
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### Added
66

77
### Fixed
8+
* Fix various issues with the Grazie implementation, in particular default rules for Grazie Pro
89

910
## [0.9.10-alpha.2] - 2024-12-05
1011

build.gradle.kts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ dependencies {
113113
bundledPlugin("tanvd.grazi")
114114
plugin("com.firsttimeinforever.intellij.pdf.viewer.intellij-pdf-viewer:0.17.0")
115115
plugin("com.jetbrains.hackathon.indices.viewer:1.28")
116+
// Does not work in tests: https://youtrack.jetbrains.com/issue/GRZ-5023
117+
// plugin("com.intellij.grazie.pro:0.3.347")
116118
}
117119

118120
// Local dependencies

src/nl/hannahsten/texifyidea/inspections/grazie/LatexTextExtractor.kt

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,16 @@ package nl.hannahsten.texifyidea.inspections.grazie
33
import com.intellij.grazie.grammar.strategy.StrategyUtils
44
import com.intellij.grazie.text.TextContent
55
import com.intellij.grazie.text.TextExtractor
6+
import com.intellij.lang.tree.util.children
67
import com.intellij.psi.PsiElement
8+
import com.intellij.psi.PsiWhiteSpace
79
import com.intellij.psi.util.startOffset
810
import nl.hannahsten.texifyidea.lang.commands.Argument
911
import nl.hannahsten.texifyidea.lang.commands.LatexCommand
1012
import nl.hannahsten.texifyidea.psi.*
1113
import nl.hannahsten.texifyidea.util.merge
1214
import nl.hannahsten.texifyidea.util.overlaps
13-
import nl.hannahsten.texifyidea.util.parser.childrenOfType
14-
import nl.hannahsten.texifyidea.util.parser.endOffset
15-
import nl.hannahsten.texifyidea.util.parser.firstParentOfType
16-
import nl.hannahsten.texifyidea.util.parser.parents
15+
import nl.hannahsten.texifyidea.util.parser.*
1716
import nl.hannahsten.texifyidea.util.toTextRange
1817

1918
/**
@@ -27,6 +26,10 @@ class LatexTextExtractor : TextExtractor() {
2726
return null
2827
}
2928

29+
return buildTextContent(root)
30+
}
31+
32+
fun buildTextContent(root: LatexContent): TextContent? {
3033
// Since Grazie works by first checking leaf elements, and if it gets null tries one level higher, we cannot return anything (e.g. literal for a command, comment for comments) other than LatexContent because then LatexContent itself will not be used as a root.
3134
// However, we do need it as a root because we need to filter out certain things like inline math ourselves, so that we can make sure all the whitespace around ignored items is correct.
3235
val domain = TextContent.TextDomain.PLAIN_TEXT
@@ -37,29 +40,38 @@ class LatexTextExtractor : TextExtractor() {
3740
.map { TextContent.Exclusion.exclude(it.toTextRange()) }
3841
.filter { it.start >= 0 && it.end <= textContent.length }
3942

40-
return textContent.excludeRanges(stealthyRanges)
43+
val textToSubmit = textContent.excludeRanges(stealthyRanges)
44+
return textToSubmit
4145
}
4246

4347
/**
4448
* Get ranges to ignore.
4549
* Note: IntRange has an inclusive end.
4650
*/
47-
private fun getStealthyRanges(root: PsiElement): List<IntRange> {
51+
fun getStealthyRanges(root: PsiElement): List<IntRange> {
4852
// Getting text takes time, so we only do it once
4953
val rootText = root.text
5054

5155
// Only keep normaltext, assuming other things (like inline math) need to be ignored.
52-
val ranges = (root.childrenOfType(LatexNormalText::class) + root.childrenOfType<LatexParameterText>())
56+
val ranges = (root.childrenOfType(LatexNormalText::class) + root.childrenOfType<LatexParameterText>() + root.childrenOfType<PsiWhiteSpace>())
5357
.asSequence()
54-
.filter { it.isNotInMathEnvironment() && it.isNotInSquareBrackets() }
58+
.filter { !it.inMathContext() && it.isNotInSquareBrackets() }
59+
// Ordering is relevant for whitespace
60+
.sortedBy { it.startOffset }
61+
// Always keep newlines, as they may be the only whitespace splitting consecutive commands
62+
.filter { text -> text !is PsiWhiteSpace || text.text.contains("\n") }
63+
// Skip arguments of non-text commands, but keep arguments of unknown commands, in particular if they are in the middle of a sentence
64+
// Even commends which have no text as argument, for example certain reference commands like auteref, may need to be kept in to get correct punctuation
65+
.filterNot { text -> text is LatexParameterText && LatexCommand.lookup(text.firstParentOfType(LatexCommands::class)?.name)?.firstOrNull()?.arguments?.any { it.type != Argument.Type.TEXT && it.type != Argument.Type.LABEL } == true }
66+
// Environment names are never part of a sentence
67+
.filterNot { text -> text.firstParentOfType<LatexBeginCommand>() != null || text.firstParentOfType<LatexEndCommand>() != null }
68+
// If we encounter an unescaped &, we are in some language construct like a tabular, so we ignore this because ofter a tabular does not contain full sentences
69+
.filter { text -> text.node.children().none { it.elementType == LatexTypes.AMPERSAND } }
70+
// NOTE: it is not allowed to start the text we send to Grazie with a newline! If we do, then Grazie will just not do anything. So we exclude whitespace at the start
71+
.dropWhile { it is PsiWhiteSpace }
5572
// Ranges that we need to keep
5673
// Note that textRangeInParent will not be correct because that's the text range in the direct parent, not in the root
5774
.flatMap { text ->
58-
// Skip arguments of non-text commands
59-
if (text is LatexParameterText && LatexCommand.lookup(text.firstParentOfType(LatexCommands::class)?.name)?.firstOrNull()?.arguments?.any { it.type == Argument.Type.TEXT } != true) {
60-
return@flatMap emptyList()
61-
}
62-
6375
var start = text.textRange.startOffset - root.startOffset
6476
// If LatexNormalText starts after a newline following a command, the newline is not part of the LatexNormalText so we include it manually to make sure that it is seen as a space between sentences
6577
// NOTE: it is not allowed to start the text we send to Grazie with a newline! If we do, then Grazie will just not do anything. So we exclude the newline for the first normal text in the file.
@@ -71,7 +83,7 @@ class LatexTextExtractor : TextExtractor() {
7183

7284
// -1 Because endOffset is exclusive, but we are working with inclusive end here
7385
var end = text.textRange.endOffset - 1 - root.startOffset
74-
// If LatexNormalText ends, for example because it is followed by a command, we do want to include the space in front of the command, since it is still typeset as a space, which is not true for the space after the command
86+
// If LatexNormalText ends, for example because it is followed by a command, we do want to include the space in front of the command, since it is still typeset as a space, which is not true for the space after the command if the command has no arguments,
7587
// except when the space is followed by inline math, since we ignore inline math altogether (which is probably not correct) we should also ignore the space
7688
if (setOf(' ', '\n').contains(rootText.getOrNull(end + 1)) && rootText.getOrNull(end + 2) != '$') {
7789
end += 1
@@ -100,14 +112,10 @@ class LatexTextExtractor : TextExtractor() {
100112
ranges.removeAll(overlapped.toSet())
101113
ranges.add(indent.merge(overlapped))
102114
}
103-
// This is approximately (except at the start) the text we send to Grazie
104-
// val text = ranges.sortedBy { it.first }.flatMap { listOf(it.first, it.last) }.toMutableList().also { it.add(0, -1) }
105-
// .chunked(2) { if (it.size > 1) rootText.substring(it[0] + 1, it[1]) else null }
115+
106116
return ranges.sortedBy { it.first }
107117
}
108118

109-
private fun PsiElement.isNotInMathEnvironment() = parents().none { it is LatexMathEnvironment }
110-
111119
private fun PsiElement.isNotInSquareBrackets() = parents().find { it is LatexGroup || it is LatexOptionalParam }
112120
?.let { it is LatexGroup } ?: true
113121
}

src/nl/hannahsten/texifyidea/lang/commands/LatexGenericRegularCommand.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ enum class LatexGenericRegularCommand(
154154
INDEXSPACE("indexspace"),
155155
INDEX("intex", "entry".asRequired()),
156156
IT("it"),
157-
ITEM("item", "label".asOptional()),
157+
ITEM("item", "label".asOptional(Argument.Type.TEXT)),
158158
ITSHAPE("itshape"),
159159
LABEL("label", "key".asRequired()),
160160
LARGE("large"),

src/nl/hannahsten/texifyidea/lang/commands/LatexGlossariesCommand.kt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ import com.intellij.psi.PsiElement
44
import nl.hannahsten.texifyidea.lang.LatexPackage
55
import nl.hannahsten.texifyidea.psi.LatexCommands
66
import nl.hannahsten.texifyidea.psi.LatexParameterText
7-
import nl.hannahsten.texifyidea.util.parser.firstChildOfType
87
import nl.hannahsten.texifyidea.util.magic.CommandMagic
8+
import nl.hannahsten.texifyidea.util.parser.firstChildOfType
99
import nl.hannahsten.texifyidea.util.parser.requiredParameters
1010

1111
enum class LatexGlossariesCommand(
@@ -46,10 +46,10 @@ enum class LatexGlossariesCommand(
4646
"long".asRequired(),
4747
dependency = LatexPackage.GLOSSARIES
4848
),
49-
GLS("gls", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
50-
GLSUPPER("Gls", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
51-
GLSPLURAL("glspl", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
52-
GLSPLURALUPPER("Glspl", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
49+
GLS("gls", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
50+
GLSUPPER("Gls", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
51+
GLSPLURAL("glspl", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
52+
GLSPLURALUPPER("Glspl", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
5353

5454
LOADGLSENTRIES(
5555
"loadglsentries",

test/nl/hannahsten/texifyidea/inspections/grazie/GrazieInspectionTest.kt

Lines changed: 104 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ import com.intellij.grazie.ide.msg.GrazieStateLifecycle
66
import com.intellij.grazie.jlanguage.Lang
77
import com.intellij.grazie.remote.GrazieRemote
88
import com.intellij.openapi.application.ApplicationManager
9+
import com.intellij.psi.PsiFile
910
import com.intellij.spellchecker.inspections.SpellCheckingInspection
1011
import com.intellij.testFramework.fixtures.BasePlatformTestCase
1112
import com.intellij.testFramework.fixtures.impl.CodeInsightTestFixtureImpl
1213
import com.intellij.util.messages.Topic
1314
import nl.hannahsten.texifyidea.file.LatexFileType
15+
import nl.hannahsten.texifyidea.psi.LatexContent
16+
import nl.hannahsten.texifyidea.util.parser.firstChildOfType
1417

1518
class GrazieInspectionTest : BasePlatformTestCase() {
1619

@@ -28,24 +31,40 @@ class GrazieInspectionTest : BasePlatformTestCase() {
2831
}
2932
}
3033

31-
fun testCheckGrammarInConstructs() {
34+
fun testSingleSentence() {
3235
myFixture.configureByText(LatexFileType, """Is these an error with a sentence ${'$'}\xi${'$'} end or not.""")
3336
myFixture.checkHighlighting()
34-
val testName = getTestName(false)
35-
myFixture.configureByFile("$testName.tex")
36-
myFixture.checkHighlighting(true, false, false, true)
3737
}
3838

39-
fun testMultilineCheckGrammar() {
40-
val testName = getTestName(false)
41-
myFixture.configureByFile("$testName.tex")
39+
fun testCommentInText() {
40+
myFixture.configureByText(
41+
LatexFileType,
42+
"""
43+
\begin{document}
44+
All <GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or those are?">those is</GRAMMAR_ERROR> problems in the middle of a sentence.
45+
% <GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or Those are?">Those is</GRAMMAR_ERROR> a problem in a comment
46+
<GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or Those are?">Those is</GRAMMAR_ERROR> a problem at the beginning of a sentence.
47+
\end{document}
48+
""".trimIndent()
49+
)
4250
myFixture.checkHighlighting(true, false, false, true)
4351
}
4452

45-
fun testInlineMath() {
53+
fun testSentenceAtEnvironmentStart() {
4654
myFixture.configureByText(
47-
LatexFileType, """Does Grazie detect ${'$'}m$ as a sentence?"""
55+
LatexFileType,
56+
"""
57+
\begin{document}
58+
<GRAMMAR_ERROR descr="Use An instead of 'A' if the following word starts with a vowel sound, e.g. 'an article', 'an hour'.">A</GRAMMAR_ERROR> apple a day keeps the doctor away.
59+
Some other sentence.
60+
\end{document}
61+
""".trimIndent()
4862
)
63+
myFixture.checkHighlighting(true, false, false, true)
64+
}
65+
66+
fun testInlineMath() {
67+
myFixture.configureByText(LatexFileType, """Does Grazie detect ${'$'}m$ as a sentence?""")
4968
myFixture.checkHighlighting()
5069
}
5170

@@ -61,9 +80,7 @@ class GrazieInspectionTest : BasePlatformTestCase() {
6180
}
6281

6382
fun testMatchingParens() {
64-
myFixture.configureByText(
65-
LatexFileType, """a (in this case) . aa"""
66-
)
83+
myFixture.configureByText(LatexFileType, """A sentence (in this case). More sentence.""")
6784
myFixture.checkHighlighting()
6885
}
6986

@@ -122,21 +139,79 @@ class GrazieInspectionTest : BasePlatformTestCase() {
122139
myFixture.checkHighlighting()
123140
}
124141

125-
// Broken in 2023.2 (TEX-177)
126-
// fun testTabular() {
127-
// GrazieRemote.download(Lang.GERMANY_GERMAN)
128-
// GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
129-
// myFixture.configureByText(
130-
// LatexFileType,
131-
// """
132-
// \begin{tabular}{llll}
133-
// ${'$'}a${'$'}: & ${'$'}\mathbb{N}${'$'} & \rightarrow & ${'$'}M${'$'} \\
134-
// \multicolumn{1}{l}{} & ${'$'}n${'$'} & \mapsto & ${'$'}a(n)${'$'}.
135-
// \end{tabular}
136-
//
137-
// Ich bin über die Entwicklung sehr froh.
138-
// """.trimIndent()
139-
// )
140-
// myFixture.checkHighlighting()
141-
// }
142+
fun testGermanGlossaries() {
143+
GrazieRemote.download(Lang.GERMANY_GERMAN)
144+
GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
145+
myFixture.configureByText(
146+
LatexFileType,
147+
"""
148+
Der Hintergrund des Themas der Thesis ist der Umbruch beim Prozess des \gls{api}-Managements.
149+
""".trimIndent()
150+
)
151+
myFixture.checkHighlighting()
152+
}
153+
154+
fun testTabular() {
155+
GrazieRemote.download(Lang.GERMANY_GERMAN)
156+
GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
157+
myFixture.configureByText(
158+
LatexFileType,
159+
"""
160+
\begin{tabular}{llll}
161+
${'$'}a${'$'}: & ${'$'}\mathbb{N}${'$'} & \rightarrow & ${'$'}M${'$'} \\
162+
\multicolumn{1}{l}{} & ${'$'}n${'$'} & \mapsto & ${'$'}a(n)${'$'}.
163+
\end{tabular}
164+
165+
Ich bin über die Entwicklung sehr froh.
166+
""".trimIndent()
167+
)
168+
myFixture.checkHighlighting()
169+
}
170+
171+
/*
172+
* These rules are not enabled by default in Grazie Lite, but do show up by default in Grazie Pro.
173+
* To find a rule id, search for the name in https://community.languagetool.org/rule/list and use the id together with the prefex from LangTool.globalIdPrefix
174+
*/
175+
176+
fun testCommaInSentence() {
177+
GrazieConfig.update { it.copy(userEnabledRules = setOf("LanguageTool.EN.COMMA_PARENTHESIS_WHITESPACE")) }
178+
myFixture.configureByText(LatexFileType, """\label{fig} Similar to the structure presented in \autoref{fig}, it is.""")
179+
myFixture.checkHighlighting()
180+
}
181+
182+
fun testCommandsInSentence() {
183+
GrazieConfig.update { it.copy(userEnabledRules = setOf("LanguageTool.EN.CONSECUTIVE_SPACES")) }
184+
myFixture.configureByText(LatexFileType, """The principles of a generic \ac{PID} controller.""")
185+
myFixture.checkHighlighting()
186+
}
187+
188+
/*
189+
* Grazie Pro
190+
*
191+
* These tests only test false positives in Grazie Pro (com.intellij.grazie.pro.style.StyleInspection), but that is not possible to test at the moment: https://youtrack.jetbrains.com/issue/GRZ-5023
192+
* So we test the excluded ranges directly.
193+
*/
194+
195+
/**
196+
* Text as sent to Grazie.
197+
*/
198+
private fun getSubmittedText(file: PsiFile): String {
199+
return LatexTextExtractor().buildTextContent(file.firstChildOfType(LatexContent::class)!!).toString()
200+
}
201+
202+
fun testNewlinesShouldBeKept() {
203+
val text = """
204+
\section{First}
205+
\section{Second}
206+
""".trimIndent()
207+
myFixture.configureByText(LatexFileType, text)
208+
val submittedText = getSubmittedText(myFixture.file)
209+
assertEquals(
210+
"""
211+
First
212+
Second
213+
""".trimIndent(),
214+
submittedText
215+
)
216+
}
142217
}

test/resources/inspections/grazie/CheckGrammarInConstructs.tex

Lines changed: 0 additions & 5 deletions
This file was deleted.

test/resources/inspections/grazie/MultilineCheckGrammar.tex

Lines changed: 0 additions & 4 deletions
This file was deleted.

0 commit comments

Comments
 (0)