Skip to content

Commit

Permalink
save
Browse files Browse the repository at this point in the history
  • Loading branch information
nsthorat committed Dec 19, 2023
1 parent e15f2e6 commit 28f5045
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions lilac/signals/markdown_code_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
from ..schema import Field, Item, RichData, field, span
from ..signal import TextSignal

MARKDOWN_RE = '```([^\n ]*?)\n(.*?)\n```'


class MarkdownCodeBlockSignal(TextSignal):
"""Finds markdown blocks in text. Emits the language of the block with the span."""

name: ClassVar[str] = 'markdown_code_block'
display_name: ClassVar[str] = 'Markdown Code Block Detection'

markdown_block_re = re.compile('```([^\n ]*?)\n(.*?)\n```', re.MULTILINE | re.DOTALL)

@override
def fields(self) -> Field:
return field(
Expand All @@ -33,11 +33,12 @@ def fields(self) -> Field:

@override
def compute(self, data: Iterable[RichData]) -> Iterator[Optional[Item]]:
markdown_re = re.compile(MARKDOWN_RE, re.MULTILINE | re.DOTALL)
for doc in data:
text = cast(str, doc)
# Get the spans
markdown_re_spans = self.markdown_block_re.finditer(text)
languages = self.markdown_block_re.findall(text)
markdown_re_spans = markdown_re.finditer(text)
languages = markdown_re.findall(text)

spans: list[Item] = []
for re_span, (language, _) in zip(markdown_re_spans, languages):
Expand Down

0 comments on commit 28f5045

Please sign in to comment.