From e9e2c3afdee63ae2cf6e6de469fe8c49ae405e8e Mon Sep 17 00:00:00 2001
From: Nikolay Arhipov <n@arhipov.net>
Date: Sat, 9 Mar 2024 11:51:43 +0200
Subject: [PATCH 1/4] Support code blocks

---
 src/generator.rs |  28 ++++++++----
 src/lexer.rs     |  40 ++++++++++-------
 src/parser.rs    | 114 ++++++++++++++++++++++++++++++-----------------
 3 files changed, 116 insertions(+), 66 deletions(-)
diff --git a/src/generator.rs b/src/generator.rs
index 6d6d8ac..22d00e3 100644
--- a/src/generator.rs
+++ b/src/generator.rs
@@ -42,20 +42,22 @@ pub fn rustdoc(input: String) -> Result<String, ParseError> {
 
                 str
             }
-            GrammarItem::Text(v) => if group_started {
-                v.replacen("*", "", 1)
-            } else {
-                v
-            },
+            GrammarItem::Text(v) => {
+                if group_started {
+                    v.replacen("*", "", 1)
+                } else {
+                    v
+                }
+            }
             // See <https://stackoverflow.com/a/40354789>
             GrammarItem::GroupStart => {
                 group_started = true;
                 String::from("# ")
-            },
+            }
             GrammarItem::GroupEnd => {
                 group_started = false;
-                continue
-            },
+                continue;
+            }
         };
     }
 
@@ -171,6 +173,8 @@ fn generate_notation(
             "par" => String::from("# "),
             "details" | "pre" | "post" => String::from("\n\n"),
             "brief" | "short" => String::new(),
+            "code" => String::from("```"),
+            "endcode" => String::from("```"),
             _ => String::new(),
         },
         (new_param, new_return, new_throw),
@@ -359,6 +363,14 @@ mod test {
         );
     }
 
+    #[test]
+    fn code() {
+        test_rustdoc!(
+            "@code\nfn main() {\n        test( [1] ); // @code @throw\n@endcode",
+            "```\nfn main() {\n        test( [1] ); // @code @throw\n```"
+        );
+    }
+
     #[test]
     fn can_parse_example() {
         let example = include_str!("../tests/assets/example-bindgen.rs");
diff --git a/src/lexer.rs b/src/lexer.rs
index 589753f..9c917ed 100644
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -3,10 +3,22 @@ pub(crate) enum LexItem {
     At(String),
     Paren(char),
     Word(String),
-    Space,
+    Whitespace(char),
     NewLine,
 }
 
+impl LexItem {
+    pub(crate) fn push_to(&self, acc: &mut String) {
+        match self {
+            LexItem::At(s) => acc.push_str(s),
+            LexItem::Paren(c) => acc.push(*c),
+            LexItem::Word(s) => acc.push_str(s),
+            LexItem::Whitespace(c) => acc.push(*c),
+            LexItem::NewLine => acc.push('\n'),
+        }
+    }
+}
+
 pub(crate) fn lex(input: String) -> Vec<LexItem> {
     let mut result = vec![];
 
@@ -34,12 +46,8 @@ pub(crate) fn lex(input: String) -> Vec<LexItem> {
             '{' | '}' => {
                 result.push(LexItem::Paren(c));
             }
-            ' ' => {
-                if let Some(v) = result.last_mut() {
-                    if !matches!(v, LexItem::Space) {
-                        result.push(LexItem::Space);
-                    }
-                }
+            ' ' | '\t' => {
+                result.push(LexItem::Whitespace(c));
             }
             '\n' => {
                 result.push(LexItem::NewLine);
@@ -72,9 +80,9 @@ mod test {
             vec![
                 LexItem::At("@".into()),
                 LexItem::Word("name".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Memory".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Management".into())
             ]
         );
@@ -85,9 +93,9 @@ mod test {
             vec![
                 LexItem::At("\\".into()),
                 LexItem::Word("name".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Memory".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Management".into())
             ]
         );
@@ -98,9 +106,9 @@ mod test {
             vec![
                 LexItem::At("\\\\".into()),
                 LexItem::Word("name".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Memory".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Management".into())
             ]
         );
@@ -116,12 +124,12 @@ mod test {
                 LexItem::Paren('{'),
                 LexItem::NewLine,
                 LexItem::Word("*".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::At("@".into()),
                 LexItem::Word("name".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Memory".into()),
-                LexItem::Space,
+                LexItem::Whitespace(' '),
                 LexItem::Word("Management".into()),
                 LexItem::NewLine,
                 LexItem::At("@".into()),
diff --git a/src/parser.rs b/src/parser.rs
index 8d772fb..db30e9d 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -25,20 +25,44 @@ pub(crate) enum GrammarItem {
 }
 
 pub(crate) fn parse(input: String) -> Result<Vec<GrammarItem>, ParseError> {
-    let mut lexed = lex(input);
-    lexed.push(LexItem::Space);
-    lexed.push(LexItem::Space);
-    lexed.push(LexItem::Space);
+    let lexed = lex(input);
     parse_items(lexed)
 }
 
 fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
     let mut grammar_items = vec![];
-    let mut param_iter_skip_count = 0;
+    let mut param_word_skip_count = 0;
 
-    for item in input.windows(4) {
-        let current = item.get(0).unwrap();
-        let next = item.get(1);
+    for (index, current) in input.iter().enumerate() {
+        let rest = &input[index..];
+        let next = rest.get(1);
+
+        if matches!(current, LexItem::Word(_)) && param_word_skip_count > 0 {
+            param_word_skip_count -= 1;
+            continue;
+        }
+
+        // Do not do any formatting inside of code blocks
+        let ends_code = matches!(current, LexItem::At(_))
+            && matches!(next, Some(LexItem::Word(v)) if v == "endcode");
+        if !ends_code {
+            match &mut grammar_items[..] {
+                [.., GrammarItem::Notation { tag, .. }] if tag == "code" => {
+                    let mut text = String::new();
+                    current.push_to(&mut text);
+
+                    grammar_items.push(GrammarItem::Text(text));
+                    continue;
+                }
+                [.., GrammarItem::Notation { tag, .. }, GrammarItem::Text(text)]
+                    if tag == "code" =>
+                {
+                    current.push_to(text);
+                    continue;
+                }
+                _ => {}
+            }
+        }
 
         match current {
             LexItem::At(_) => {
@@ -56,9 +80,10 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                         },
                         LexItem::Word(v) => {
                             let mut meta = vec![];
-                            let params;
                             let content;
 
+                            let expects_params;
+
                             if v.starts_with("param") {
                                 let value = v.split('[').collect::<Vec<_>>();
                                 match value.get(1) {
@@ -79,34 +104,36 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                                     },
                                 }
 
-                                params = match item.get(3) {
-                                    None => vec![],
-                                    Some(LexItem::Word(v)) => vec![v.into()],
-                                    Some(_) => vec![],
-                                };
-
-                                content = "param"
+                                content = "param";
+                                expects_params = true;
                             } else {
                                 content = v;
 
-                                params = match v.as_str() {
+                                expects_params = match v.as_str() {
                                     "a" | "b" | "c" | "p" | "emoji" | "e" | "em" | "def"
                                     | "class" | "category" | "concept" | "enum" | "example"
                                     | "extends" | "file" | "sa" | "see" | "retval"
-                                    | "exception" | "throw" | "throws" => match item.get(3) {
-                                        None => vec![],
-                                        Some(LexItem::Word(v)) => vec![v.into()],
-                                        Some(_) => vec![],
-                                    },
-                                    _ => vec![],
+                                    | "exception" | "throw" | "throws" => true,
+                                    _ => false,
                                 };
                             }
 
-                            if params.is_empty() {
-                                param_iter_skip_count = 1;
+                            let params = if expects_params {
+                                rest.iter().skip(2).find_map(|next| match next {
+                                    LexItem::Word(word) => Some(word),
+                                    _ => None,
+                                })
                             } else {
-                                param_iter_skip_count = 2;
-                            }
+                                None
+                            };
+
+                            let params = if let Some(word) = params {
+                                param_word_skip_count = 2;
+                                vec![word.into()]
+                            } else {
+                                param_word_skip_count = 1;
+                                vec![]
+                            };
 
                             grammar_items.push(GrammarItem::Notation {
                                 meta,
@@ -119,11 +146,6 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                 }
             }
             LexItem::Word(v) => {
-                if param_iter_skip_count > 0 {
-                    param_iter_skip_count -= 1;
-                    continue;
-                }
-
                 if let Some(prev) = grammar_items.last_mut() {
                     match prev {
                         GrammarItem::Text(text) => *text += v,
@@ -133,16 +155,20 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                     grammar_items.push(GrammarItem::Text(v.into()));
                 }
             }
-            LexItem::Space => {
-                if let Some(prev) = grammar_items.last_mut() {
-                    match prev {
-                        GrammarItem::Text(text) => *text += " ",
-                        _ => grammar_items.push(GrammarItem::Text("".into())),
-                    }
-                } else {
-                    grammar_items.push(GrammarItem::Text(" ".into()))
+            LexItem::Whitespace(whitespace) => match &mut grammar_items[..] {
+                [.., GrammarItem::Notation { tag, .. }] if tag == "code" => {
+                    grammar_items.push(GrammarItem::Text((*whitespace).into()))
                 }
-            }
+                [.., GrammarItem::Notation { tag, .. }, GrammarItem::Text(text)]
+                    if tag == "code" =>
+                {
+                    text.push(*whitespace)
+                }
+                [.., GrammarItem::Text(text)] if text.ends_with(' ') => {}
+                [.., GrammarItem::Text(text)] => text.push(' '),
+                [] => grammar_items.push(GrammarItem::Text(' '.into())),
+                _ => grammar_items.push(GrammarItem::Text("".into())),
+            },
             LexItem::NewLine => {
                 if let Some(GrammarItem::Text(text)) = grammar_items.last_mut() {
                     *text += "\n"
@@ -233,7 +259,11 @@ mod test {
 
     #[test]
     pub fn trims_param_texts() {
-        let result = parse("@param[in]           var                                         Example description".into()).unwrap();
+        let result = parse(
+            "@param[in]           var                                         Example description"
+                .into(),
+        )
+        .unwrap();
         assert_eq!(
             result,
             vec![

From 5071ed5859cfdea7060d7ca4677e9b6f57d9871c Mon Sep 17 00:00:00 2001
From: Nikolay Arhipov <n@arhipov.net>
Date: Sat, 9 Mar 2024 13:12:08 +0200
Subject: [PATCH 2/4] Support language in code blocks

---
 src/generator.rs |  14 ++++-
 src/parser.rs    | 140 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 123 insertions(+), 31 deletions(-)

diff --git a/src/generator.rs b/src/generator.rs
index 22d00e3..2e3af1e 100644
--- a/src/generator.rs
+++ b/src/generator.rs
@@ -173,7 +173,11 @@ fn generate_notation(
             "par" => String::from("# "),
             "details" | "pre" | "post" => String::from("\n\n"),
             "brief" | "short" => String::new(),
-            "code" => String::from("```"),
+            "code" => {
+                let lang = params.get(0).map(|p| p.as_str()).unwrap_or_default();
+                let lang = lang.strip_prefix('.').unwrap_or(lang);
+                format!("```{lang}")
+            }
             "endcode" => String::from("```"),
             _ => String::new(),
         },
@@ -371,6 +375,14 @@ mod test {
         );
     }
 
+    #[test]
+    fn code_with_lang() {
+        test_rustdoc!(
+            "@code{.rs}\nfn main() {\n        test( [1] ); // @code @throw\n@endcode",
+            "```rs\nfn main() {\n        test( [1] ); // @code @throw\n```"
+        );
+    }
+
     #[test]
     fn can_parse_example() {
         let example = include_str!("../tests/assets/example-bindgen.rs");
diff --git a/src/parser.rs b/src/parser.rs
index db30e9d..f1d477c 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -24,6 +24,12 @@ pub(crate) enum GrammarItem {
     GroupEnd,
 }
 
+enum ParamParser {
+    None,
+    Whitespace,
+    Paren,
+}
+
 pub(crate) fn parse(input: String) -> Result<Vec<GrammarItem>, ParseError> {
     let lexed = lex(input);
     parse_items(lexed)
@@ -31,14 +37,14 @@ pub(crate) fn parse(input: String) -> Result<Vec<GrammarItem>, ParseError> {
 
 fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
     let mut grammar_items = vec![];
-    let mut param_word_skip_count = 0;
+    let mut param_iter_skip_count = 0;
 
     for (index, current) in input.iter().enumerate() {
         let rest = &input[index..];
         let next = rest.get(1);
 
-        if matches!(current, LexItem::Word(_)) && param_word_skip_count > 0 {
-            param_word_skip_count -= 1;
+        if param_iter_skip_count > 0 {
+            param_iter_skip_count -= 1;
             continue;
         }
 
@@ -82,7 +88,7 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                             let mut meta = vec![];
                             let content;
 
-                            let expects_params;
+                            let param_parser;
 
                             if v.starts_with("param") {
                                 let value = v.split('[').collect::<Vec<_>>();
@@ -105,33 +111,45 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                                 }
 
                                 content = "param";
-                                expects_params = true;
+                                param_parser = ParamParser::Whitespace;
                             } else {
                                 content = v;
 
-                                expects_params = match v.as_str() {
+                                param_parser = match v.as_str() {
                                     "a" | "b" | "c" | "p" | "emoji" | "e" | "em" | "def"
                                     | "class" | "category" | "concept" | "enum" | "example"
                                     | "extends" | "file" | "sa" | "see" | "retval"
-                                    | "exception" | "throw" | "throws" => true,
-                                    _ => false,
+                                    | "exception" | "throw" | "throws" => ParamParser::Whitespace,
+                                    "code" => ParamParser::Paren,
+                                    _ => ParamParser::None,
                                 };
                             }
 
-                            let params = if expects_params {
-                                rest.iter().skip(2).find_map(|next| match next {
-                                    LexItem::Word(word) => Some(word),
+                            let param = match param_parser {
+                                ParamParser::None => None,
+                                ParamParser::Whitespace => rest
+                                    .iter()
+                                    .enumerate()
+                                    .skip(2)
+                                    .skip_while(|(_, next)| matches!(next, LexItem::Whitespace(_)))
+                                    .next()
+                                    .and_then(|(skip, next)| match next {
+                                        LexItem::Word(word) => Some((skip, word)),
+                                        _ => None,
+                                    }),
+                                ParamParser::Paren => match &rest {
+                                    [_, _, LexItem::Paren('{'), LexItem::Word(word), LexItem::Paren('}'), ..] => {
+                                        Some((4, word))
+                                    }
                                     _ => None,
-                                })
-                            } else {
-                                None
+                                },
                             };
 
-                            let params = if let Some(word) = params {
-                                param_word_skip_count = 2;
+                            let params = if let Some((skip, word)) = param {
+                                param_iter_skip_count = skip;
                                 vec![word.into()]
                             } else {
-                                param_word_skip_count = 1;
+                                param_iter_skip_count = 1;
                                 vec![]
                             };
 
@@ -155,20 +173,20 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                     grammar_items.push(GrammarItem::Text(v.into()));
                 }
             }
-            LexItem::Whitespace(whitespace) => match &mut grammar_items[..] {
-                [.., GrammarItem::Notation { tag, .. }] if tag == "code" => {
-                    grammar_items.push(GrammarItem::Text((*whitespace).into()))
-                }
-                [.., GrammarItem::Notation { tag, .. }, GrammarItem::Text(text)]
-                    if tag == "code" =>
-                {
-                    text.push(*whitespace)
+            LexItem::Whitespace(_) => {
+                if let Some(prev) = grammar_items.last_mut() {
+                    match prev {
+                        GrammarItem::Text(text) if text.ends_with(' ') => {}
+                        GrammarItem::Text(text) => *text += " ",
+                        GrammarItem::Notation { params, .. } if !params.is_empty() => {
+                            grammar_items.push(GrammarItem::Text(" ".into()))
+                        }
+                        _ => grammar_items.push(GrammarItem::Text("".into())),
+                    }
+                } else {
+                    grammar_items.push(GrammarItem::Text(" ".into()));
                 }
-                [.., GrammarItem::Text(text)] if text.ends_with(' ') => {}
-                [.., GrammarItem::Text(text)] => text.push(' '),
-                [] => grammar_items.push(GrammarItem::Text(' '.into())),
-                _ => grammar_items.push(GrammarItem::Text("".into())),
-            },
+            }
             LexItem::NewLine => {
                 if let Some(GrammarItem::Text(text)) = grammar_items.last_mut() {
                     *text += "\n"
@@ -238,6 +256,24 @@ mod test {
         );
     }
 
+    #[test]
+    pub fn param_tabs() {
+        let result =
+            parse("@param[in]\trandom\t\t\tThis is, without a doubt, a random argument.".into())
+                .unwrap();
+        assert_eq!(
+            result,
+            vec![
+                GrammarItem::Notation {
+                    meta: vec!["in".into()],
+                    params: vec!["random".into()],
+                    tag: "param".into(),
+                },
+                GrammarItem::Text(" This is, without a doubt, a random argument.".into())
+            ]
+        );
+    }
+
     #[test]
     pub fn groups() {
         let result = parse("@{\n* @name Memory Management\n@}".into()).unwrap();
@@ -276,4 +312,48 @@ mod test {
             ]
         )
     }
+
+    #[test]
+    pub fn code() {
+        let result = parse("@code\nfn main() {}\n@endcode".into()).unwrap();
+
+        assert_eq!(
+            result,
+            vec![
+                GrammarItem::Notation {
+                    meta: vec![],
+                    params: vec![],
+                    tag: "code".into(),
+                },
+                GrammarItem::Text("\nfn main() {}\n".into()),
+                GrammarItem::Notation {
+                    meta: vec![],
+                    params: vec![],
+                    tag: "endcode".into(),
+                },
+            ]
+        )
+    }
+
+    #[test]
+    pub fn code_with_param() {
+        let result = parse("@code{.py}\nfn main() {}\n@endcode".into()).unwrap();
+
+        assert_eq!(
+            result,
+            vec![
+                GrammarItem::Notation {
+                    meta: vec![],
+                    params: vec![".py".into()],
+                    tag: "code".into(),
+                },
+                GrammarItem::Text("\nfn main() {}\n".into()),
+                GrammarItem::Notation {
+                    meta: vec![],
+                    params: vec![],
+                    tag: "endcode".into(),
+                },
+            ]
+        )
+    }
 }

From 080e3203baaa6d5350658fb42c9165667b733551 Mon Sep 17 00:00:00 2001
From: Nikolay Arhipov <n@arhipov.net>
Date: Sat, 9 Mar 2024 13:14:37 +0200
Subject: [PATCH 3/4] Clippy

---
 src/generator.rs | 2 +-
 src/parser.rs    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/generator.rs b/src/generator.rs
index 2e3af1e..dfd4017 100644
--- a/src/generator.rs
+++ b/src/generator.rs
@@ -174,7 +174,7 @@ fn generate_notation(
             "details" | "pre" | "post" => String::from("\n\n"),
             "brief" | "short" => String::new(),
             "code" => {
-                let lang = params.get(0).map(|p| p.as_str()).unwrap_or_default();
+                let lang = params.first().map(|p| p.as_str()).unwrap_or_default();
                 let lang = lang.strip_prefix('.').unwrap_or(lang);
                 format!("```{lang}")
             }
diff --git a/src/parser.rs b/src/parser.rs
index f1d477c..e7038ad 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -131,8 +131,7 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                                     .iter()
                                     .enumerate()
                                     .skip(2)
-                                    .skip_while(|(_, next)| matches!(next, LexItem::Whitespace(_)))
-                                    .next()
+                                    .find(|(_, next)| !matches!(next, LexItem::Whitespace(_)))
                                     .and_then(|(skip, next)| match next {
                                         LexItem::Word(word) => Some((skip, word)),
                                         _ => None,

From 66758336c5cb4e0390c8d3419c3029a7b57d892a Mon Sep 17 00:00:00 2001
From: Nikolay Arhipov <n@arhipov.net>
Date: Sat, 9 Mar 2024 16:15:16 +0200
Subject: [PATCH 4/4] Newlines after code blocks should remain

---
 src/parser.rs | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/parser.rs b/src/parser.rs
index e7038ad..5345fca 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -157,6 +157,10 @@ fn parse_items(input: Vec<LexItem>) -> Result<Vec<GrammarItem>, ParseError> {
                                 params,
                                 tag: content.into(),
                             });
+
+                            if content == "endcode" {
+                                grammar_items.push(GrammarItem::Text("".into()));
+                            }
                         }
                         _ => {}
                     }
@@ -330,6 +334,7 @@ mod test {
                     params: vec![],
                     tag: "endcode".into(),
                 },
+                GrammarItem::Text("".into())
             ]
         )
     }
@@ -352,6 +357,36 @@ mod test {
                     params: vec![],
                     tag: "endcode".into(),
                 },
+                GrammarItem::Text("".into())
+            ]
+        )
+    }
+
+    #[test]
+    pub fn code_with_args() {
+        let result = parse("@code\nfn main() {}\n@endcode\n\n@param[in] a - a".into()).unwrap();
+
+        assert_eq!(
+            result,
+            vec![
+                GrammarItem::Notation {
+                    meta: vec![],
+                    params: vec![],
+                    tag: "code".into(),
+                },
+                GrammarItem::Text("\nfn main() {}\n".into()),
+                GrammarItem::Notation {
+                    meta: vec![],
+                    params: vec![],
+                    tag: "endcode".into(),
+                },
+                GrammarItem::Text("\n\n".into()),
+                GrammarItem::Notation {
+                    meta: vec!["in".into()],
+                    params: vec!["a".into()],
+                    tag: "param".into()
+                },
+                GrammarItem::Text(" - a".into())
             ]
         )
     }