Skip to content

Commit da586ab

Browse files
committed
Move codespell:ignore check into Spellchecker
This makes the API automatically avoid some declared false-positives that the command line tool would also filter.
1 parent 99a9aad commit da586ab

File tree

2 files changed

+23
-15
lines changed

2 files changed

+23
-15
lines changed

codespell_lib/_codespell.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
"(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
6060
"\\b[\\w.%+-]+@[\\w.-]+\\b)"
6161
)
62-
inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
6362
USAGE = """
6463
\t%prog [OPTIONS] [file1 file2 ... fileN]
6564
"""
@@ -952,20 +951,10 @@ def parse_file(
952951
if not line or line in exclude_lines:
953952
continue
954953

955-
extra_words_to_ignore = set()
956-
match = inline_ignore_regex.search(line)
957-
if match:
958-
extra_words_to_ignore = set(
959-
filter(None, (match.group("words") or "").split(","))
960-
)
961-
if not extra_words_to_ignore:
962-
continue
963-
964954
fixed_words = set()
965955
asked_for = set()
966956

967-
issues = spellchecker.spellcheck_line(line, line_tokenizer, extra_words_to_ignore=extra_words_to_ignore)
968-
for issue in issues:
957+
for issue in spellchecker.spellcheck_line(line, line_tokenizer):
969958
misspelling = issue.misspelling
970959
word = issue.word
971960
lword = issue.lword

codespell_lib/spellchecker.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from typing import (
2222
Container,
2323
Dict,
24+
FrozenSet,
2425
Generic,
2526
Iterable,
2627
Optional,
@@ -108,6 +109,8 @@
108109

109110
_builtin_default_as_tuple = tuple(_builtin_default.split(","))
110111

112+
_inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
113+
111114

112115
class UnknownBuiltinDictionaryError(ValueError):
113116
def __init__(self, name: str) -> None:
@@ -173,12 +176,21 @@ def __init__(self) -> None:
173176
self._misspellings: Dict[str, Misspelling] = {}
174177
self.ignore_words_cased: Container[str] = frozenset()
175178

179+
def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]:
180+
inline_ignore_match = _inline_ignore_regex.search(line)
181+
if inline_ignore_match:
182+
words = frozenset(
183+
filter(None, (inline_ignore_match.group("words") or "").split(","))
184+
)
185+
return words if words else None
186+
return frozenset()
187+
176188
def spellcheck_line(
177189
self,
178190
line: str,
179191
tokenizer: LineTokenizer[T_co],
180192
*,
181-
extra_words_to_ignore: Container[str] = frozenset()
193+
respect_inline_ignore: bool = True,
182194
) -> Iterable[DetectedMisspelling[T_co]]:
183195
"""Tokenize and spellcheck a line
184196
@@ -187,12 +199,19 @@ def spellcheck_line(
187199
188200
:param line: The line to spellcheck.
189201
:param tokenizer: A callable that will tokenize the line
190-
:param extra_words_to_ignore: Extra words to ignore for this particular line
191-
(such as content from a `codespell:ignore` comment)
202+
:param respect_inline_ignore: Whether to check the line for
203+
`codespell:ignore` instructions
204+
:returns: An iterable of discovered typos.
192205
"""
193206
misspellings = self._misspellings
194207
ignore_words_cased = self.ignore_words_cased
195208

209+
extra_words_to_ignore = (
210+
self._parse_inline_ignore(line) if respect_inline_ignore else frozenset()
211+
)
212+
if extra_words_to_ignore is None:
213+
return
214+
196215
for token in tokenizer(line):
197216
word = token.group()
198217
if word in ignore_words_cased:

0 commit comments

Comments
 (0)