Skip to content

Commit 8416fe9

Browse files
committed
0.7.28 - reduce domain name false positives (pii).
1 parent 84f2693 commit 8416fe9

File tree

3 files changed

+17
-7
lines changed

3 files changed

+17
-7
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "rustrict"
33
authors = ["Finn Bear"]
4-
version = "0.7.27"
4+
version = "0.7.28"
55
edition = "2021"
66
license = "MIT OR Apache-2.0"
77
repository = "https://github.com/finnbear/rustrict/"

src/context.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ use std::time::{Duration, Instant};
88

99
/// Context is useful for taking moderation actions on a per-user basis i.e. each user would get
1010
/// their own Context.
11-
///
11+
///
1212
/// # Recommendation
13-
///
13+
///
1414
/// Use this as a reference implementation e.g. by copying and adapting it.
1515
#[derive(Clone)]
1616
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -868,6 +868,12 @@ mod tests {
868868
let json = serde_json::to_value(&ctx).unwrap();
869869
let only_safe_until = &json["only_safe_until"];
870870
let unix = only_safe_until.as_i64().unwrap();
871-
assert!(unix > 1000 + SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_millis() as i64)
871+
assert!(
872+
unix > 1000
873+
+ SystemTime::now()
874+
.duration_since(SystemTime::UNIX_EPOCH)
875+
.unwrap()
876+
.as_millis() as i64
877+
)
872878
}
873879
}

src/pii.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ use std::borrow::Cow;
55
lazy_static! {
66
static ref PHONE : Regex = Regex::new(r#"(\+\d{1,2})?\s*\(?\d{3}\)?[\s\.-]*\d{3}[\s\.-]*\d{4}"#).unwrap();
77
static ref IP_ADDRESS : Regex = Regex::new(r#"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"#).unwrap();
8-
static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*[a-z]{2,3}"#).unwrap();
8+
static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site)"#).unwrap();
99
//static ref ADDRESS : Regex = Regex::new(r#"(?i)\d+[ ](?:[A-Za-z0-9\.-]+ )+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Ave|Dr|Rd|Blvd|Ln|St)\.?(\s+#[0-9]{1,5})?"#).unwrap();
1010
static ref NAME : Regex = Regex::new(r#"(?i)(real\s)?name\s+is:?\s[a-zA-Z]+(\s[a-zA-z]+)?"#).unwrap();
11-
static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{4,}\.[a-zA-Z]{2,3}"#).unwrap();
11+
static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{4,}\.(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site)"#).unwrap();
1212
}
1313

1414
/// Returns [`s`] with personally-identifiable information censored out, and a `true` if
@@ -103,7 +103,11 @@ mod tests {
103103
for line in include_str!("./safe.txt")
104104
.lines()
105105
.chain(include_str!("./false_positives.txt").lines())
106-
.chain(r#"1234 Have 1234"#.lines())
106+
.chain(
107+
r#"1234 Have 1234
108+
gmail.zzz"#
109+
.lines(),
110+
)
107111
{
108112
assert!(!has_pii(line), "{line}");
109113
}

0 commit comments

Comments
 (0)