From e9e2c3afdee63ae2cf6e6de469fe8c49ae405e8e Mon Sep 17 00:00:00 2001 From: Nikolay Arhipov Date: Sat, 9 Mar 2024 11:51:43 +0200 Subject: [PATCH 1/4] Support code blocks --- src/generator.rs | 28 ++++++++---- src/lexer.rs | 40 ++++++++++------- src/parser.rs | 114 ++++++++++++++++++++++++++++++----------------- 3 files changed, 116 insertions(+), 66 deletions(-) diff --git a/src/generator.rs b/src/generator.rs index 6d6d8ac..22d00e3 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -42,20 +42,22 @@ pub fn rustdoc(input: String) -> Result { str } - GrammarItem::Text(v) => if group_started { - v.replacen("*", "", 1) - } else { - v - }, + GrammarItem::Text(v) => { + if group_started { + v.replacen("*", "", 1) + } else { + v + } + } // See GrammarItem::GroupStart => { group_started = true; String::from("# ") - }, + } GrammarItem::GroupEnd => { group_started = false; - continue - }, + continue; + } }; } @@ -171,6 +173,8 @@ fn generate_notation( "par" => String::from("# "), "details" | "pre" | "post" => String::from("\n\n"), "brief" | "short" => String::new(), + "code" => String::from("```"), + "endcode" => String::from("```"), _ => String::new(), }, (new_param, new_return, new_throw), @@ -359,6 +363,14 @@ mod test { ); } + #[test] + fn code() { + test_rustdoc!( + "@code\nfn main() {\n test( [1] ); // @code @throw\n@endcode", + "```\nfn main() {\n test( [1] ); // @code @throw\n```" + ); + } + #[test] fn can_parse_example() { let example = include_str!("../tests/assets/example-bindgen.rs"); diff --git a/src/lexer.rs b/src/lexer.rs index 589753f..9c917ed 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -3,10 +3,22 @@ pub(crate) enum LexItem { At(String), Paren(char), Word(String), - Space, + Whitespace(char), NewLine, } +impl LexItem { + pub(crate) fn push_to(&self, acc: &mut String) { + match self { + LexItem::At(s) => acc.push_str(s), + LexItem::Paren(c) => acc.push(*c), + LexItem::Word(s) => acc.push_str(s), + LexItem::Whitespace(c) => acc.push(*c), + LexItem::NewLine => acc.push('\n'), + } + } +} + pub(crate) fn lex(input: String) -> Vec { let mut result = vec![]; @@ -34,12 +46,8 @@ pub(crate) fn lex(input: String) -> Vec { '{' | '}' => { result.push(LexItem::Paren(c)); } - ' ' => { - if let Some(v) = result.last_mut() { - if !matches!(v, LexItem::Space) { - result.push(LexItem::Space); - } - } + ' ' | '\t' => { + result.push(LexItem::Whitespace(c)); } '\n' => { result.push(LexItem::NewLine); @@ -72,9 +80,9 @@ mod test { vec![ LexItem::At("@".into()), LexItem::Word("name".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Memory".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Management".into()) ] ); @@ -85,9 +93,9 @@ mod test { vec![ LexItem::At("\\".into()), LexItem::Word("name".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Memory".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Management".into()) ] ); @@ -98,9 +106,9 @@ mod test { vec![ LexItem::At("\\\\".into()), LexItem::Word("name".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Memory".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Management".into()) ] ); @@ -116,12 +124,12 @@ mod test { LexItem::Paren('{'), LexItem::NewLine, LexItem::Word("*".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::At("@".into()), LexItem::Word("name".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Memory".into()), - LexItem::Space, + LexItem::Whitespace(' '), LexItem::Word("Management".into()), LexItem::NewLine, LexItem::At("@".into()), diff --git a/src/parser.rs b/src/parser.rs index 8d772fb..db30e9d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -25,20 +25,44 @@ pub(crate) enum GrammarItem { } pub(crate) fn parse(input: String) -> Result, ParseError> { - let mut lexed = lex(input); - lexed.push(LexItem::Space); - lexed.push(LexItem::Space); - lexed.push(LexItem::Space); + let lexed = lex(input); parse_items(lexed) } fn parse_items(input: Vec) -> Result, ParseError> { let mut grammar_items = vec![]; - let mut param_iter_skip_count = 0; + let mut param_word_skip_count = 0; - for item in input.windows(4) { - let current = item.get(0).unwrap(); - let next = item.get(1); + for (index, current) in input.iter().enumerate() { + let rest = &input[index..]; + let next = rest.get(1); + + if matches!(current, LexItem::Word(_)) && param_word_skip_count > 0 { + param_word_skip_count -= 1; + continue; + } + + // Do not do any formatting inside of code blocks + let ends_code = matches!(current, LexItem::At(_)) + && matches!(next, Some(LexItem::Word(v)) if v == "endcode"); + if !ends_code { + match &mut grammar_items[..] { + [.., GrammarItem::Notation { tag, .. }] if tag == "code" => { + let mut text = String::new(); + current.push_to(&mut text); + + grammar_items.push(GrammarItem::Text(text)); + continue; + } + [.., GrammarItem::Notation { tag, .. }, GrammarItem::Text(text)] + if tag == "code" => + { + current.push_to(text); + continue; + } + _ => {} + } + } match current { LexItem::At(_) => { @@ -56,9 +80,10 @@ fn parse_items(input: Vec) -> Result, ParseError> { }, LexItem::Word(v) => { let mut meta = vec![]; - let params; let content; + let expects_params; + if v.starts_with("param") { let value = v.split('[').collect::>(); match value.get(1) { @@ -79,34 +104,36 @@ fn parse_items(input: Vec) -> Result, ParseError> { }, } - params = match item.get(3) { - None => vec![], - Some(LexItem::Word(v)) => vec![v.into()], - Some(_) => vec![], - }; - - content = "param" + content = "param"; + expects_params = true; } else { content = v; - params = match v.as_str() { + expects_params = match v.as_str() { "a" | "b" | "c" | "p" | "emoji" | "e" | "em" | "def" | "class" | "category" | "concept" | "enum" | "example" | "extends" | "file" | "sa" | "see" | "retval" - | "exception" | "throw" | "throws" => match item.get(3) { - None => vec![], - Some(LexItem::Word(v)) => vec![v.into()], - Some(_) => vec![], - }, - _ => vec![], + | "exception" | "throw" | "throws" => true, + _ => false, }; } - if params.is_empty() { - param_iter_skip_count = 1; + let params = if expects_params { + rest.iter().skip(2).find_map(|next| match next { + LexItem::Word(word) => Some(word), + _ => None, + }) } else { - param_iter_skip_count = 2; - } + None + }; + + let params = if let Some(word) = params { + param_word_skip_count = 2; + vec![word.into()] + } else { + param_word_skip_count = 1; + vec![] + }; grammar_items.push(GrammarItem::Notation { meta, @@ -119,11 +146,6 @@ fn parse_items(input: Vec) -> Result, ParseError> { } } LexItem::Word(v) => { - if param_iter_skip_count > 0 { - param_iter_skip_count -= 1; - continue; - } - if let Some(prev) = grammar_items.last_mut() { match prev { GrammarItem::Text(text) => *text += v, @@ -133,16 +155,20 @@ fn parse_items(input: Vec) -> Result, ParseError> { grammar_items.push(GrammarItem::Text(v.into())); } } - LexItem::Space => { - if let Some(prev) = grammar_items.last_mut() { - match prev { - GrammarItem::Text(text) => *text += " ", - _ => grammar_items.push(GrammarItem::Text("".into())), - } - } else { - grammar_items.push(GrammarItem::Text(" ".into())) + LexItem::Whitespace(whitespace) => match &mut grammar_items[..] { + [.., GrammarItem::Notation { tag, .. }] if tag == "code" => { + grammar_items.push(GrammarItem::Text((*whitespace).into())) } - } + [.., GrammarItem::Notation { tag, .. }, GrammarItem::Text(text)] + if tag == "code" => + { + text.push(*whitespace) + } + [.., GrammarItem::Text(text)] if text.ends_with(' ') => {} + [.., GrammarItem::Text(text)] => text.push(' '), + [] => grammar_items.push(GrammarItem::Text(' '.into())), + _ => grammar_items.push(GrammarItem::Text("".into())), + }, LexItem::NewLine => { if let Some(GrammarItem::Text(text)) = grammar_items.last_mut() { *text += "\n" @@ -233,7 +259,11 @@ mod test { #[test] pub fn trims_param_texts() { - let result = parse("@param[in] var Example description".into()).unwrap(); + let result = parse( + "@param[in] var Example description" + .into(), + ) + .unwrap(); assert_eq!( result, vec![ From 5071ed5859cfdea7060d7ca4677e9b6f57d9871c Mon Sep 17 00:00:00 2001 From: Nikolay Arhipov Date: Sat, 9 Mar 2024 13:12:08 +0200 Subject: [PATCH 2/4] Support language in code blocks --- src/generator.rs | 14 ++++- src/parser.rs | 140 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 123 insertions(+), 31 deletions(-) diff --git a/src/generator.rs b/src/generator.rs index 22d00e3..2e3af1e 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -173,7 +173,11 @@ fn generate_notation( "par" => String::from("# "), "details" | "pre" | "post" => String::from("\n\n"), "brief" | "short" => String::new(), - "code" => String::from("```"), + "code" => { + let lang = params.get(0).map(|p| p.as_str()).unwrap_or_default(); + let lang = lang.strip_prefix('.').unwrap_or(lang); + format!("```{lang}") + } "endcode" => String::from("```"), _ => String::new(), }, @@ -371,6 +375,14 @@ mod test { ); } + #[test] + fn code_with_lang() { + test_rustdoc!( + "@code{.rs}\nfn main() {\n test( [1] ); // @code @throw\n@endcode", + "```rs\nfn main() {\n test( [1] ); // @code @throw\n```" + ); + } + #[test] fn can_parse_example() { let example = include_str!("../tests/assets/example-bindgen.rs"); diff --git a/src/parser.rs b/src/parser.rs index db30e9d..f1d477c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -24,6 +24,12 @@ pub(crate) enum GrammarItem { GroupEnd, } +enum ParamParser { + None, + Whitespace, + Paren, +} + pub(crate) fn parse(input: String) -> Result, ParseError> { let lexed = lex(input); parse_items(lexed) @@ -31,14 +37,14 @@ pub(crate) fn parse(input: String) -> Result, ParseError> { fn parse_items(input: Vec) -> Result, ParseError> { let mut grammar_items = vec![]; - let mut param_word_skip_count = 0; + let mut param_iter_skip_count = 0; for (index, current) in input.iter().enumerate() { let rest = &input[index..]; let next = rest.get(1); - if matches!(current, LexItem::Word(_)) && param_word_skip_count > 0 { - param_word_skip_count -= 1; + if param_iter_skip_count > 0 { + param_iter_skip_count -= 1; continue; } @@ -82,7 +88,7 @@ fn parse_items(input: Vec) -> Result, ParseError> { let mut meta = vec![]; let content; - let expects_params; + let param_parser; if v.starts_with("param") { let value = v.split('[').collect::>(); @@ -105,33 +111,45 @@ fn parse_items(input: Vec) -> Result, ParseError> { } content = "param"; - expects_params = true; + param_parser = ParamParser::Whitespace; } else { content = v; - expects_params = match v.as_str() { + param_parser = match v.as_str() { "a" | "b" | "c" | "p" | "emoji" | "e" | "em" | "def" | "class" | "category" | "concept" | "enum" | "example" | "extends" | "file" | "sa" | "see" | "retval" - | "exception" | "throw" | "throws" => true, - _ => false, + | "exception" | "throw" | "throws" => ParamParser::Whitespace, + "code" => ParamParser::Paren, + _ => ParamParser::None, }; } - let params = if expects_params { - rest.iter().skip(2).find_map(|next| match next { - LexItem::Word(word) => Some(word), + let param = match param_parser { + ParamParser::None => None, + ParamParser::Whitespace => rest + .iter() + .enumerate() + .skip(2) + .skip_while(|(_, next)| matches!(next, LexItem::Whitespace(_))) + .next() + .and_then(|(skip, next)| match next { + LexItem::Word(word) => Some((skip, word)), + _ => None, + }), + ParamParser::Paren => match &rest { + [_, _, LexItem::Paren('{'), LexItem::Word(word), LexItem::Paren('}'), ..] => { + Some((4, word)) + } _ => None, - }) - } else { - None + }, }; - let params = if let Some(word) = params { - param_word_skip_count = 2; + let params = if let Some((skip, word)) = param { + param_iter_skip_count = skip; vec![word.into()] } else { - param_word_skip_count = 1; + param_iter_skip_count = 1; vec![] }; @@ -155,20 +173,20 @@ fn parse_items(input: Vec) -> Result, ParseError> { grammar_items.push(GrammarItem::Text(v.into())); } } - LexItem::Whitespace(whitespace) => match &mut grammar_items[..] { - [.., GrammarItem::Notation { tag, .. }] if tag == "code" => { - grammar_items.push(GrammarItem::Text((*whitespace).into())) - } - [.., GrammarItem::Notation { tag, .. }, GrammarItem::Text(text)] - if tag == "code" => - { - text.push(*whitespace) + LexItem::Whitespace(_) => { + if let Some(prev) = grammar_items.last_mut() { + match prev { + GrammarItem::Text(text) if text.ends_with(' ') => {} + GrammarItem::Text(text) => *text += " ", + GrammarItem::Notation { params, .. } if !params.is_empty() => { + grammar_items.push(GrammarItem::Text(" ".into())) + } + _ => grammar_items.push(GrammarItem::Text("".into())), + } + } else { + grammar_items.push(GrammarItem::Text(" ".into())); } - [.., GrammarItem::Text(text)] if text.ends_with(' ') => {} - [.., GrammarItem::Text(text)] => text.push(' '), - [] => grammar_items.push(GrammarItem::Text(' '.into())), - _ => grammar_items.push(GrammarItem::Text("".into())), - }, + } LexItem::NewLine => { if let Some(GrammarItem::Text(text)) = grammar_items.last_mut() { *text += "\n" @@ -238,6 +256,24 @@ mod test { ); } + #[test] + pub fn param_tabs() { + let result = + parse("@param[in]\trandom\t\t\tThis is, without a doubt, a random argument.".into()) + .unwrap(); + assert_eq!( + result, + vec![ + GrammarItem::Notation { + meta: vec!["in".into()], + params: vec!["random".into()], + tag: "param".into(), + }, + GrammarItem::Text(" This is, without a doubt, a random argument.".into()) + ] + ); + } + #[test] pub fn groups() { let result = parse("@{\n* @name Memory Management\n@}".into()).unwrap(); @@ -276,4 +312,48 @@ mod test { ] ) } + + #[test] + pub fn code() { + let result = parse("@code\nfn main() {}\n@endcode".into()).unwrap(); + + assert_eq!( + result, + vec![ + GrammarItem::Notation { + meta: vec![], + params: vec![], + tag: "code".into(), + }, + GrammarItem::Text("\nfn main() {}\n".into()), + GrammarItem::Notation { + meta: vec![], + params: vec![], + tag: "endcode".into(), + }, + ] + ) + } + + #[test] + pub fn code_with_param() { + let result = parse("@code{.py}\nfn main() {}\n@endcode".into()).unwrap(); + + assert_eq!( + result, + vec![ + GrammarItem::Notation { + meta: vec![], + params: vec![".py".into()], + tag: "code".into(), + }, + GrammarItem::Text("\nfn main() {}\n".into()), + GrammarItem::Notation { + meta: vec![], + params: vec![], + tag: "endcode".into(), + }, + ] + ) + } } From 080e3203baaa6d5350658fb42c9165667b733551 Mon Sep 17 00:00:00 2001 From: Nikolay Arhipov Date: Sat, 9 Mar 2024 13:14:37 +0200 Subject: [PATCH 3/4] Clippy --- src/generator.rs | 2 +- src/parser.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/generator.rs b/src/generator.rs index 2e3af1e..dfd4017 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -174,7 +174,7 @@ fn generate_notation( "details" | "pre" | "post" => String::from("\n\n"), "brief" | "short" => String::new(), "code" => { - let lang = params.get(0).map(|p| p.as_str()).unwrap_or_default(); + let lang = params.first().map(|p| p.as_str()).unwrap_or_default(); let lang = lang.strip_prefix('.').unwrap_or(lang); format!("```{lang}") } diff --git a/src/parser.rs b/src/parser.rs index f1d477c..e7038ad 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -131,8 +131,7 @@ fn parse_items(input: Vec) -> Result, ParseError> { .iter() .enumerate() .skip(2) - .skip_while(|(_, next)| matches!(next, LexItem::Whitespace(_))) - .next() + .find(|(_, next)| !matches!(next, LexItem::Whitespace(_))) .and_then(|(skip, next)| match next { LexItem::Word(word) => Some((skip, word)), _ => None, From 66758336c5cb4e0390c8d3419c3029a7b57d892a Mon Sep 17 00:00:00 2001 From: Nikolay Arhipov Date: Sat, 9 Mar 2024 16:15:16 +0200 Subject: [PATCH 4/4] Newlines after code blocks should remain --- src/parser.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index e7038ad..5345fca 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -157,6 +157,10 @@ fn parse_items(input: Vec) -> Result, ParseError> { params, tag: content.into(), }); + + if content == "endcode" { + grammar_items.push(GrammarItem::Text("".into())); + } } _ => {} } @@ -330,6 +334,7 @@ mod test { params: vec![], tag: "endcode".into(), }, + GrammarItem::Text("".into()) ] ) } @@ -352,6 +357,36 @@ mod test { params: vec![], tag: "endcode".into(), }, + GrammarItem::Text("".into()) + ] + ) + } + + #[test] + pub fn code_with_args() { + let result = parse("@code\nfn main() {}\n@endcode\n\n@param[in] a - a".into()).unwrap(); + + assert_eq!( + result, + vec![ + GrammarItem::Notation { + meta: vec![], + params: vec![], + tag: "code".into(), + }, + GrammarItem::Text("\nfn main() {}\n".into()), + GrammarItem::Notation { + meta: vec![], + params: vec![], + tag: "endcode".into(), + }, + GrammarItem::Text("\n\n".into()), + GrammarItem::Notation { + meta: vec!["in".into()], + params: vec!["a".into()], + tag: "param".into() + }, + GrammarItem::Text(" - a".into()) ] ) }