diff --git a/CHANGELOG.md b/CHANGELOG.md index 391b052..6aec808 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,17 @@ Patch notes are automatically extracted from this changelog whenever a tag is pushed to the GitHub repository. The tag name must match a heading exactly. +## Next Release + +- Add `--regex` flag, which enables the use of regular expressions to search + for section titles. +- Add `--first` flag, which only prints the first matching section. +- Fix an issue where extra newlines where inserted into the final output. + + ## v0.1.1 - - Publish as a binary instead of a library +- Publish as a binary instead of a library ## v0.1.0-alpha @@ -14,6 +22,6 @@ pushed to the GitHub repository. The tag name must match a heading exactly. This version is the initial release of `markdown_extract`! It features the following: - - Extract sections from a markdown document - - Run from the command line - - Use as a Rust library +- Extract sections from a markdown document +- Run from the command line +- Use as a Rust library diff --git a/src/bin.rs b/src/bin.rs index 1a43446..d6db069 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,15 +1,32 @@ -use markdown_extract::{Document, Parser, Section}; +pub mod document; +mod matchers; +mod parser; + +use document::{Document, Section}; +use matchers::{Matcher, RegexMatcher, SimpleMatcher}; +use parser::Parser; use std::convert::TryInto; use std::error::Error; use std::fs::File; use std::path::PathBuf; use structopt::StructOpt; -/// Extracts sections of a markdown file. +/// Extract sections of a markdown file. #[derive(StructOpt)] #[structopt(name = "markdown-extract")] -struct Opts { - /// Title is case sensitive +pub struct Opts { + /// Only return the first match + #[structopt(short, long)] + first: bool, + + /// Compile pattern as a regular expression. + /// + /// Documentation for the regex syntax can be found at + /// + #[structopt(short, long)] + regex: bool, + + /// Treat pattern as case sensitive #[structopt(short = "s", long)] case_sensitive: bool, @@ -17,8 +34,8 @@ struct Opts { #[structopt(short, long)] ignore_first_heading: bool, - /// A title to find in section headings - title: String, + /// Pattern to match against section headings + pattern: String, /// Path to markdown file #[structopt(parse(from_os_str))] @@ -35,7 +52,7 @@ fn print_section(document: &Document, section: &Section, ignore_first_heading: b section.title ); } - println!("{}", section.body); + println!("{}", section.body.join("\n")); // Print children for child in §ion.children { @@ -52,17 +69,12 @@ fn run() -> Result<(), Box> { let file = File::open(&opts.path)?; let document = parser.parse_file(file)?; - let matches: Document = document - .iter() - .filter(|section| { - if opts.case_sensitive { - section.title.trim() == opts.title.trim() - } else { - section.title.to_lowercase().trim() == opts.title.to_lowercase().trim() - } - }) - .cloned() - .collect(); + // Match + let matches = if opts.regex { + RegexMatcher::get_matches(&document, &opts) + } else { + SimpleMatcher::get_matches(&document, &opts) + }; // Handle no matches if matches.is_empty() { @@ -70,6 +82,13 @@ fn run() -> Result<(), Box> { return Ok(()); } + // Only print the first match + if opts.first { + // It's okay to use `[0]` here since we check if the doc is empty above + print_section(&document, &matches[0], opts.ignore_first_heading); + return Ok(()); + } + // Print matching sections for section in matches { print_section(&document, §ion, opts.ignore_first_heading); diff --git a/src/document.rs b/src/document.rs index e1fe664..806b909 100644 --- a/src/document.rs +++ b/src/document.rs @@ -11,7 +11,7 @@ pub struct Section { /// Raw markdown body. /// Does not include any child sections. See the `children` property for child content. - pub body: String, + pub body: Vec, /// An optional pointer to a parent section. /// This property should always be `Some`, unless the section is located at the root of the @@ -28,7 +28,7 @@ impl Section { Section { level: 0, title: String::new(), - body: String::new(), + body: Vec::new(), parent: None, children: Vec::new(), } @@ -36,8 +36,7 @@ impl Section { /// Appends the given line to the section's body pub fn append_to_body(&mut self, line: String) { - self.body.push_str(&line); - self.body.push('\n'); + self.body.push(line); } /// Add a child to this section. diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 583ab7c..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod document; -pub mod parser; - -pub use document::{Document, Section}; -pub use parser::Parser; diff --git a/src/matchers/mod.rs b/src/matchers/mod.rs index ce0e5d5..7152769 100644 --- a/src/matchers/mod.rs +++ b/src/matchers/mod.rs @@ -1,11 +1,12 @@ mod regex; mod simple; -pub use regex::RegexMatcher; -pub use simple::SimpleMatcher; +pub use self::regex::RegexMatcher; +use crate::document::Document; use crate::Opts; +pub use simple::SimpleMatcher; pub trait Matcher { /// Find any sections within the document that match - pub fn match(document: &Document, options: &Opts) -> Document; + fn get_matches(document: &Document, opts: &Opts) -> Document; } diff --git a/src/matchers/regex.rs b/src/matchers/regex.rs new file mode 100644 index 0000000..ac8b75d --- /dev/null +++ b/src/matchers/regex.rs @@ -0,0 +1,24 @@ +use super::Matcher; +use crate::document::Document; +use crate::Opts; +use regex::RegexBuilder; + +pub struct RegexMatcher; + +impl Matcher for RegexMatcher { + /// Compile the pattern as a regular expression + fn get_matches(document: &Document, opts: &Opts) -> Document { + // Compile regex for provided pattern + let re = RegexBuilder::new(&opts.pattern) + .case_insensitive(!opts.case_sensitive) + .size_limit(1024 * 100) // 100 kb + .build() + .unwrap(); + + document + .iter() + .filter(|section| re.is_match(§ion.title)) + .cloned() + .collect() + } +} diff --git a/src/matchers/simple.rs b/src/matchers/simple.rs new file mode 100644 index 0000000..ef4ab73 --- /dev/null +++ b/src/matchers/simple.rs @@ -0,0 +1,22 @@ +use super::Matcher; +use crate::document::Document; +use crate::Opts; + +pub struct SimpleMatcher; + +impl Matcher for SimpleMatcher { + /// Performs a simple pattern == title match + fn get_matches(document: &Document, opts: &Opts) -> Document { + document + .iter() + .filter(|section| { + if opts.case_sensitive { + section.title.trim() == opts.pattern.trim() + } else { + section.title.to_lowercase().trim() == opts.pattern.to_lowercase().trim() + } + }) + .cloned() + .collect() + } +}