diff --git a/core/src/extra/mod.rs b/core/src/extra/mod.rs index c2ab546..4204cc4 100644 --- a/core/src/extra/mod.rs +++ b/core/src/extra/mod.rs @@ -1,6 +1,9 @@ +use crate::search::SearchSink; +use grep::{regex::RegexMatcher, searcher::Searcher}; use std::{error::Error, path::Path}; pub mod office; pub mod pdf; -pub type ExtractFn = fn(path: &Path) -> Result>; +pub type ExtraFn = + fn(&mut Searcher, &RegexMatcher, &Path, &mut SearchSink) -> Result<(), Box>; diff --git a/core/src/extra/office.rs b/core/src/extra/office.rs index 1d41224..6be2afb 100644 --- a/core/src/extra/office.rs +++ b/core/src/extra/office.rs @@ -1,9 +1,22 @@ +use crate::search::SearchSink; use dotext::{doc::OpenOfficeDoc, *}; +use grep::{regex::RegexMatcher, searcher::Searcher}; use std::{error::Error, io::Read, path::Path}; pub static EXTENSIONS: &[&str] = &["docx", "pptx", "xlsx", "odt", "odp", "ods"]; -pub fn extract(path: &Path) -> Result> { +pub fn process( + searcher: &mut Searcher, + matcher: &RegexMatcher, + path: &Path, + sink: &mut SearchSink, +) -> Result<(), Box> { + let text = extract(path)?; + searcher.search_slice(matcher, text.as_bytes(), sink)?; + Ok(()) +} + +fn extract(path: &Path) -> Result> { let ext = path .extension() .unwrap_or_default() diff --git a/core/src/extra/pdf.rs b/core/src/extra/pdf.rs index 7c41b23..4e70079 100644 --- a/core/src/extra/pdf.rs +++ b/core/src/extra/pdf.rs @@ -1,4 +1,6 @@ +use crate::search::SearchSink; use euclid::vec2; +use grep::{regex::RegexMatcher, searcher::Searcher}; use pdf_extract::{ encryption::DecryptionError, ConvertToFmt, Document, MediaBox, OutputDev, OutputError, Transform, @@ -7,7 +9,18 @@ use std::{error::Error, fmt::Write, panic::catch_unwind, path::Path}; pub static EXTENSIONS: &[&str] = &["pdf"]; -pub fn extract(path: &Path) -> Result> { +pub fn process( + searcher: &mut Searcher, + matcher: &RegexMatcher, + path: &Path, + sink: &mut SearchSink, +) -> Result<(), Box> { + let text = extract(path)?; + searcher.search_slice(matcher, text.as_bytes(), sink)?; + Ok(()) +} + +fn extract(path: &Path) -> Result> { let path = path.to_owned(); //because the library panics, we need to catch panics let res = catch_unwind(|| extract_text(&path)); diff --git a/core/src/search.rs b/core/src/search.rs index e2e613a..8a9fcef 100644 --- a/core/src/search.rs +++ b/core/src/search.rs @@ -10,6 +10,7 @@ use grep::{ }; use ignore::{WalkBuilder, WalkState}; use std::{ + error::Error, io, path::PathBuf, sync::{ @@ -71,12 +72,12 @@ pub fn run(engine: SearchEngine, params: SearchParameters) { .same_file_system(params.flags.same_filesystem) .build_parallel(); - let mut preprocessors: Vec<(_, extra::ExtractFn)> = Vec::new(); + let mut preprocessors: Vec<(_, extra::ExtraFn)> = Vec::new(); if params.flags.search_pdf { - preprocessors.push((extra::pdf::EXTENSIONS, extra::pdf::extract)); + preprocessors.push((extra::pdf::EXTENSIONS, extra::pdf::process)); } if params.flags.search_office { - preprocessors.push((extra::office::EXTENSIONS, extra::office::extract)); + preprocessors.push((extra::office::EXTENSIONS, extra::office::process)); } walker.run(|| { @@ -115,18 +116,7 @@ pub fn run(engine: SearchEngine, params: SearchParameters) { .map(|(_, extract_fn)| extract_fn); let search_result = match pre_processor { - Some(extract_fn) => { - let slice = extract_fn(entry.path()); - if let Err(err) = slice { - _ = engine.send_error( - search, - entry.path().to_path_buf(), - format!("failed to extract text from file: {}", err), - ); - return WalkState::Continue; - } - searcher.search_slice(&matcher, slice.unwrap().as_bytes(), &mut sink) - } + Some(process) => process(&mut searcher, &matcher, entry.path(), &mut sink), None => searcher.search_path(&matcher, entry.path(), &mut sink), }; @@ -156,7 +146,7 @@ pub fn run(engine: SearchEngine, params: SearchParameters) { _ = engine.sender.send(SearchMessage::Completed { search }); } -struct SearchSink { +pub struct SearchSink { matcher: RegexMatcher, entries: Vec, } @@ -173,7 +163,7 @@ impl SearchSink { std::mem::take(&mut self.entries) } - fn extract_matches( + pub fn extract_matches( &self, searcher: &grep::searcher::Searcher, bytes: &[u8], @@ -192,7 +182,7 @@ impl SearchSink { } impl grep::searcher::Sink for SearchSink { - type Error = io::Error; + type Error = Box; fn matched( &mut self,