Skip to content

Commit

Permalink
Add --regex, and --first flags. Plus bug fixes
Browse files Browse the repository at this point in the history
Fixed an issue with rogue newlines appearing in command output.
  • Loading branch information
sean0x42 committed Jan 30, 2020
1 parent 0969936 commit fefd0a8
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 34 deletions.
16 changes: 12 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,24 @@ Patch notes are automatically extracted from this changelog whenever a tag is
pushed to the GitHub repository. The tag name must match a heading exactly.


## Next Release

- Add `--regex` flag, which enables the use of regular expressions to search
for section titles.
- Add `--first` flag, which only prints the first matching section.
- Fix an issue where extra newlines where inserted into the final output.


## v0.1.1

- Publish as a binary instead of a library
- Publish as a binary instead of a library


## v0.1.0-alpha

This version is the initial release of `markdown_extract`! It features the
following:

- Extract sections from a markdown document
- Run from the command line
- Use as a Rust library
- Extract sections from a markdown document
- Run from the command line
- Use as a Rust library
55 changes: 37 additions & 18 deletions src/bin.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,41 @@
use markdown_extract::{Document, Parser, Section};
pub mod document;
mod matchers;
mod parser;

use document::{Document, Section};
use matchers::{Matcher, RegexMatcher, SimpleMatcher};
use parser::Parser;
use std::convert::TryInto;
use std::error::Error;
use std::fs::File;
use std::path::PathBuf;
use structopt::StructOpt;

/// Extracts sections of a markdown file.
/// Extract sections of a markdown file.
#[derive(StructOpt)]
#[structopt(name = "markdown-extract")]
struct Opts {
/// Title is case sensitive
pub struct Opts {
/// Only return the first match
#[structopt(short, long)]
first: bool,

/// Compile pattern as a regular expression.
///
/// Documentation for the regex syntax can be found at
/// <https://docs.rs/regex/1.3.3/regex/index.html#syntax>
#[structopt(short, long)]
regex: bool,

/// Treat pattern as case sensitive
#[structopt(short = "s", long)]
case_sensitive: bool,

/// Do not include the top level section heading
#[structopt(short, long)]
ignore_first_heading: bool,

/// A title to find in section headings
title: String,
/// Pattern to match against section headings
pattern: String,

/// Path to markdown file
#[structopt(parse(from_os_str))]
Expand All @@ -35,7 +52,7 @@ fn print_section(document: &Document, section: &Section, ignore_first_heading: b
section.title
);
}
println!("{}", section.body);
println!("{}", section.body.join("\n"));

// Print children
for child in &section.children {
Expand All @@ -52,24 +69,26 @@ fn run() -> Result<(), Box<dyn Error>> {
let file = File::open(&opts.path)?;
let document = parser.parse_file(file)?;

let matches: Document = document
.iter()
.filter(|section| {
if opts.case_sensitive {
section.title.trim() == opts.title.trim()
} else {
section.title.to_lowercase().trim() == opts.title.to_lowercase().trim()
}
})
.cloned()
.collect();
// Match
let matches = if opts.regex {
RegexMatcher::get_matches(&document, &opts)
} else {
SimpleMatcher::get_matches(&document, &opts)
};

// Handle no matches
if matches.is_empty() {
println!("No matches.");
return Ok(());
}

// Only print the first match
if opts.first {
// It's okay to use `[0]` here since we check if the doc is empty above
print_section(&document, &matches[0], opts.ignore_first_heading);
return Ok(());
}

// Print matching sections
for section in matches {
print_section(&document, &section, opts.ignore_first_heading);
Expand Down
7 changes: 3 additions & 4 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub struct Section {

/// Raw markdown body.
/// Does not include any child sections. See the `children` property for child content.
pub body: String,
pub body: Vec<String>,

/// An optional pointer to a parent section.
/// This property should always be `Some`, unless the section is located at the root of the
Expand All @@ -28,16 +28,15 @@ impl Section {
Section {
level: 0,
title: String::new(),
body: String::new(),
body: Vec::new(),
parent: None,
children: Vec::new(),
}
}

/// Appends the given line to the section's body
pub fn append_to_body(&mut self, line: String) {
self.body.push_str(&line);
self.body.push('\n');
self.body.push(line);
}

/// Add a child to this section.
Expand Down
5 changes: 0 additions & 5 deletions src/lib.rs

This file was deleted.

7 changes: 4 additions & 3 deletions src/matchers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
mod regex;
mod simple;

pub use regex::RegexMatcher;
pub use simple::SimpleMatcher;
pub use self::regex::RegexMatcher;
use crate::document::Document;
use crate::Opts;
pub use simple::SimpleMatcher;

pub trait Matcher {
/// Find any sections within the document that match
pub fn match(document: &Document, options: &Opts) -> Document;
fn get_matches(document: &Document, opts: &Opts) -> Document;
}
24 changes: 24 additions & 0 deletions src/matchers/regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use super::Matcher;
use crate::document::Document;
use crate::Opts;
use regex::RegexBuilder;

pub struct RegexMatcher;

impl Matcher for RegexMatcher {
/// Compile the pattern as a regular expression
fn get_matches(document: &Document, opts: &Opts) -> Document {
// Compile regex for provided pattern
let re = RegexBuilder::new(&opts.pattern)
.case_insensitive(!opts.case_sensitive)
.size_limit(1024 * 100) // 100 kb
.build()
.unwrap();

document
.iter()
.filter(|section| re.is_match(&section.title))
.cloned()
.collect()
}
}
22 changes: 22 additions & 0 deletions src/matchers/simple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use super::Matcher;
use crate::document::Document;
use crate::Opts;

pub struct SimpleMatcher;

impl Matcher for SimpleMatcher {
/// Performs a simple pattern == title match
fn get_matches(document: &Document, opts: &Opts) -> Document {
document
.iter()
.filter(|section| {
if opts.case_sensitive {
section.title.trim() == opts.pattern.trim()
} else {
section.title.to_lowercase().trim() == opts.pattern.to_lowercase().trim()
}
})
.cloned()
.collect()
}
}

0 comments on commit fefd0a8

Please sign in to comment.