Skip to content

Commit edcc677

Browse files
committed
More efficient file handling
1 parent efeb584 commit edcc677

File tree

4 files changed

+137
-124
lines changed

4 files changed

+137
-124
lines changed

src/basetype/check.rs

Lines changed: 32 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,37 @@
11
use crate::{read_bytes, Mime};
22
use fnv::FnvHashMap;
3-
use std::path::Path;
3+
use std::fs::File;
44

55
pub(crate) struct BaseType;
66

77
impl crate::Checker for BaseType {
8-
fn match_bytes(&self, file: &[u8], mimetype: &str) -> bool {
9-
match_bytes(file, mimetype)
10-
}
11-
12-
fn match_filepath(&self, filepath: &Path, mimetype: &str) -> bool {
13-
match_filepath(filepath, mimetype)
8+
fn match_bytes(&self, bytes: &[u8], mimetype: &str) -> bool {
9+
if mimetype == "application/octet-stream" || mimetype == "all/allfiles" {
10+
// Both of these are the case if we have a bytestream at all
11+
return true;
12+
}
13+
if mimetype == "text/plain" {
14+
is_text_plain_from_u8(bytes)
15+
} else {
16+
// ...how did we get bytes for this?
17+
false
18+
}
19+
}
20+
21+
fn match_file(&self, file: &File, mimetype: &str) -> bool {
22+
// Being bad with error handling here,
23+
// but if you can't open it it's probably not a file.
24+
let Ok(meta) = file.metadata() else {
25+
return false;
26+
};
27+
28+
match mimetype {
29+
"all/all" => true,
30+
"all/allfiles" | "application/octet-stream" => meta.is_file(),
31+
"inode/directory" => meta.is_dir(),
32+
"text/plain" => is_text_plain_from_file(file),
33+
_ => false,
34+
}
1435
}
1536

1637
fn get_supported(&self) -> Vec<Mime> {
@@ -27,46 +48,14 @@ impl crate::Checker for BaseType {
2748
}
2849

2950
/// If there are any null bytes, return False. Otherwise return True.
30-
fn is_text_plain_from_u8(b: &[u8]) -> bool {
31-
memchr::memchr(0, b).is_none()
51+
fn is_text_plain_from_u8(bytes: &[u8]) -> bool {
52+
memchr::memchr(0, bytes).is_none()
3253
}
3354

3455
// TODO: Hoist the main logic here somewhere else. This'll get redundant fast!
35-
fn is_text_plain_from_filepath(filepath: &Path) -> bool {
36-
let Ok(bytes) = read_bytes(filepath, 512) else {
56+
fn is_text_plain_from_file(file: &File) -> bool {
57+
let Ok(bytes) = read_bytes(file, 512) else {
3758
return false;
3859
};
3960
is_text_plain_from_u8(&bytes)
4061
}
41-
42-
#[allow(unused_variables)]
43-
pub fn match_bytes(b: &[u8], mimetype: &str) -> bool {
44-
if mimetype == "application/octet-stream" || mimetype == "all/allfiles" {
45-
// Both of these are the case if we have a bytestream at all
46-
return true;
47-
}
48-
if mimetype == "text/plain" {
49-
is_text_plain_from_u8(b)
50-
} else {
51-
// ...how did we get bytes for this?
52-
false
53-
}
54-
}
55-
56-
pub fn match_filepath(filepath: &Path, mimetype: &str) -> bool {
57-
use std::fs;
58-
59-
// Being bad with error handling here,
60-
// but if you can't open it it's probably not a file.
61-
let Ok(meta) = fs::metadata(filepath) else {
62-
return false;
63-
};
64-
65-
match mimetype {
66-
"all/all" => true,
67-
"all/allfiles" | "application/octet-stream" => meta.is_file(),
68-
"inode/directory" => meta.is_dir(),
69-
"text/plain" => is_text_plain_from_filepath(filepath),
70-
_ => false,
71-
}
72-
}

src/fdo_magic/builtin/check.rs

Lines changed: 29 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,41 @@ use super::ALL_RULES;
22
use crate::{fdo_magic::check::from_u8_walker, read_bytes, Mime};
33
use fnv::FnvHashMap;
44
use petgraph::prelude::*;
5-
use std::path::Path;
5+
use std::fs::File;
66

77
pub(crate) struct FdoMagic;
88

99
impl crate::Checker for FdoMagic {
10-
fn match_bytes(&self, file: &[u8], mimetype: &str) -> bool {
11-
match_bytes(file, mimetype)
10+
fn match_bytes(&self, bytes: &[u8], mimetype: &str) -> bool {
11+
// Get magic ruleset
12+
let Some(graph) = ALL_RULES.get(mimetype) else {
13+
return false;
14+
};
15+
16+
// Check all rulesets
17+
graph
18+
.externals(Incoming)
19+
.any(|node| from_u8_walker(bytes, graph, node, true))
1220
}
1321

14-
fn match_filepath(&self, filepath: &Path, mimetype: &str) -> bool {
15-
match_filepath(filepath, mimetype)
22+
fn match_file(&self, file: &File, mimetype: &str) -> bool {
23+
// Get magic ruleset
24+
let Some(magic_rules) = ALL_RULES.get(mimetype) else {
25+
return false;
26+
};
27+
28+
// Get # of bytes to read
29+
let scanlen = magic_rules
30+
.node_weights()
31+
.map(|rule| rule.scan_len())
32+
.max()
33+
.unwrap_or(0);
34+
35+
let Ok(bytes) = read_bytes(file, scanlen) else {
36+
return false;
37+
};
38+
39+
self.match_bytes(&bytes, mimetype)
1640
}
1741

1842
fn get_supported(&self) -> Vec<Mime> {
@@ -27,40 +51,3 @@ impl crate::Checker for FdoMagic {
2751
super::init::get_aliaslist()
2852
}
2953
}
30-
31-
/// Test against all rules
32-
#[allow(unused_variables)]
33-
pub fn match_bytes(file: &[u8], mimetype: &str) -> bool {
34-
// Get magic ruleset
35-
let Some(graph) = ALL_RULES.get(mimetype) else {
36-
return false;
37-
};
38-
39-
// Check all rulesets
40-
graph
41-
.externals(Incoming)
42-
.any(|node| from_u8_walker(file, graph, node, true))
43-
}
44-
45-
/// This only exists for the case of a direct match_filepath call
46-
/// and even then we could probably get rid of this...
47-
#[allow(unused_variables)]
48-
pub fn match_filepath(filepath: &Path, mimetype: &str) -> bool {
49-
// Get magic ruleset
50-
let Some(magic_rules) = ALL_RULES.get(mimetype) else {
51-
return false;
52-
};
53-
54-
// Get # of bytes to read
55-
let scanlen = magic_rules
56-
.node_weights()
57-
.map(|rule| rule.scan_len())
58-
.max()
59-
.unwrap_or(0);
60-
61-
let Ok(bytes) = read_bytes(filepath, scanlen) else {
62-
return false;
63-
};
64-
65-
match_bytes(&bytes, mimetype)
66-
}

src/fdo_magic/check.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
use petgraph::prelude::*;
22
use std::iter::zip;
33

4-
fn from_u8_singlerule(file: &[u8], rule: &super::MagicRule) -> bool {
4+
fn from_u8_singlerule(bytes: &[u8], rule: &super::MagicRule) -> bool {
55
// Check if we're even in bounds
66
let bound_min = rule.start_off as usize;
77
let bound_max = rule.start_off as usize + rule.val.len() + rule.region_len as usize;
88

9-
if (file.len()) < bound_max {
9+
if (bytes.len()) < bound_max {
1010
return false;
1111
}
12-
let testarea = &file[bound_min..bound_max];
12+
let testarea = &bytes[bound_min..bound_max];
1313

1414
testarea.windows(rule.val.len()).any(|window| {
1515
// Apply mask to value
@@ -27,7 +27,7 @@ fn from_u8_singlerule(file: &[u8], rule: &super::MagicRule) -> bool {
2727
/// Test every given rule by walking graph
2828
/// TODO: Not loving the code duplication here.
2929
pub fn from_u8_walker(
30-
file: &[u8],
30+
bytes: &[u8],
3131
graph: &DiGraph<super::MagicRule, u32>,
3232
node: NodeIndex,
3333
isroot: bool,
@@ -38,7 +38,7 @@ pub fn from_u8_walker(
3838
let rule = &graph[node];
3939

4040
// Check root
41-
if !from_u8_singlerule(file, rule) {
41+
if !from_u8_singlerule(bytes, rule) {
4242
return false;
4343
}
4444

@@ -54,10 +54,10 @@ pub fn from_u8_walker(
5454
for y in n {
5555
let rule = &graph[y];
5656

57-
if from_u8_singlerule(file, rule) {
57+
if from_u8_singlerule(bytes, rule) {
5858
// Check next indent level if needed
5959
if graph.neighbors_directed(y, Outgoing).count() != 0 {
60-
return from_u8_walker(file, graph, y, false);
60+
return from_u8_walker(bytes, graph, y, false);
6161
// Next indent level is lower, so this must be it
6262
} else {
6363
return true;

0 commit comments

Comments
 (0)