From 6255eb95737919a92e24459df1fc2ac4be168be8 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Sat, 1 Feb 2025 20:40:43 +0300 Subject: [PATCH] feat(comment): support single-line simple prefix comments (#23) --- src/lexer.rs | 18 ++++++++++++++++++ src/parser.rs | 3 +++ tests/input/strip_comment.jsonc | 5 +++++ tests/output/strip_comment.jsonc | 5 +++++ 4 files changed, 31 insertions(+) create mode 100644 tests/input/strip_comment.jsonc create mode 100644 tests/output/strip_comment.jsonc diff --git a/src/lexer.rs b/src/lexer.rs index 81fa196..a19ae16 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -5,6 +5,11 @@ pub enum Paired { File, // start, end } +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum Comment { + Oneline, // // or # or ; +} + #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum Lexem { String, @@ -14,6 +19,7 @@ pub enum Lexem { Colon, Else, WhiteSpace, + Comment(Comment), } fn joinable(lexem: Lexem) -> bool { @@ -39,6 +45,7 @@ impl From for Lexem { b',' => Lexem::Comma, b':' | b'=' => Lexem::Colon, b'\'' | b'"' | b'`' => Lexem::String, + b';' | b'#' | b'/' => Lexem::Comment(Comment::Oneline), _ => { if character.is_ascii_whitespace() { Lexem::WhiteSpace @@ -158,6 +165,17 @@ impl Lexer { if cfg!(debug_assertions) { eprintln!("{character} {}", character as char); } + if let Some(Lexem::Comment(Comment::Oneline)) = self.lexem { + if character == b'\r' || character == b'\n' { + self.lexem = Some(Lexem::WhiteSpace); + return Some(( + Lexem::Comment(Comment::Oneline), + std::mem::replace(&mut self.state, vec![character]), + )); + } + self.state.push(character); + return None; + } if let Some(Lexem::String) = self.lexem { let first_char = self.state.first().cloned().unwrap_or_default(); let last_char = self.state.last().cloned().unwrap_or_default(); diff --git a/src/parser.rs b/src/parser.rs index d586199..5f2c730 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -143,6 +143,9 @@ impl Parser { eprintln!("LEXEM {lexem:?} {value:?}"); } let mut result = Vec::new(); + if let Lexem::Comment(_) = lexem { + return result; + } if let Lexem::WhiteSpace = lexem { self.whitespace.extend(value); return result; diff --git a/tests/input/strip_comment.jsonc b/tests/input/strip_comment.jsonc new file mode 100644 index 0000000..72347c3 --- /dev/null +++ b/tests/input/strip_comment.jsonc @@ -0,0 +1,5 @@ +{ + // c-style oneline comment + # python-style oneline comment + ; assembler-style comment +} diff --git a/tests/output/strip_comment.jsonc b/tests/output/strip_comment.jsonc new file mode 100644 index 0000000..abd2979 --- /dev/null +++ b/tests/output/strip_comment.jsonc @@ -0,0 +1,5 @@ +{ + + + +}