Skip to content

Commit

Permalink
Implemented plain text lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
vallentin committed Jul 13, 2023
1 parent c48da03 commit 2be4f86
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 5 deletions.
13 changes: 8 additions & 5 deletions colorblast/src/lexers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,34 @@ pub mod prelude {
mod json;
mod jsonc;
mod rust;
mod text;

pub use self::json::*;
pub use self::jsonc::*;
pub use self::rust::*;
pub use self::text::*;

use crate::{IntoSimpleToken, SimpleTokenIter, Token, TokenSpan};

macro_rules! impl_enum_lexer {
(
$(
$(#[$attr:meta])*
$name:ident => $lexer:ident,
)+
$name:ident => $lexer:ident
),+ $(,)?
) => {
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
#[non_exhaustive]
pub enum Lexer {
$(
$(#[$attr])*
$name,
)+
$name
),+
}

impl Lexer {
pub const VARIANTS: &[Self] = &[
$(Self::$name,)+
$(Self::$name),+
];

pub fn into_lexer<'text>(
Expand All @@ -67,6 +69,7 @@ impl_enum_lexer!(
/// [JSON with Comments]: https://code.visualstudio.com/docs/languages/json#_json-with-comments
JsonC => JsonCLexer,
Rust => RustLexer,
PlainText => PlainTextLexer,
);

macro_rules! impl_iter {
Expand Down
57 changes: 57 additions & 0 deletions colorblast/src/lexers/text.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use super::{impl_iter, Token, TokenSpan};

/// Plain text lexer is a simple dummy passthrough tokenizer,
/// which produces at most a single <code>[Token]::[Text]</code>.
///
/// **Note:** Cloning `PlainTextLexer` is essentially a copy, as it
/// just contains a `&str`. However, `Copy` is not implemented,
/// to avoid accidentally copying immutable `PlainTextLexer`s.
///
/// # Warning
///
/// If you are about to use `PlainTextLexer` for anything outside the scope of the
/// [`colorblast` crate], then please see the warning in the [`lexers` module].
///
/// [`colorblast` crate]: crate
/// [`lexers` module]: super#warning
/// [Text]: Token::Text
#[derive(Clone, Debug)]
pub struct PlainTextLexer<'text> {
text: Option<&'text str>,
}

impl<'text> PlainTextLexer<'text> {
#[inline]
pub fn new(text: &'text str) -> Self {
Self { text: Some(text) }
}

#[inline]
fn next_token(&mut self) -> Option<(Token, TokenSpan<'text>)> {
let text = self.text.take()?;
if text.is_empty() {
return None;
}
Some((Token::Text, TokenSpan::new(text, 0..text.len())))
}
}

impl_iter!('text, PlainTextLexer<'text>);

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_plain_text_lexer_spans() {
let input = include_str!("../../../text-scanner/src/ext/rust.rs");
let mut output = String::new();

let lexer = PlainTextLexer::new(input);
for (_tok, span) in lexer {
output.push_str(span.as_str());
}

assert_eq!(input, output);
}
}
1 change: 1 addition & 0 deletions colorblast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ macro_rules! impl_enum_token {
impl_enum_token!(
Space,
Comment,
/// Token representing text, which might contain whitespace.
Text,
Var,
Var2,
Expand Down

0 comments on commit 2be4f86

Please sign in to comment.