From f744690904bb5d7c077ed2f2f66379efa88d91b3 Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Fri, 29 Nov 2024 15:40:16 +0530 Subject: [PATCH 1/4] datasets: remove unused fn definition --- src/detect-dataset.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/detect-dataset.c b/src/detect-dataset.c index ae23925f2c11..173a69cb1bf0 100644 --- a/src/detect-dataset.c +++ b/src/detect-dataset.c @@ -47,8 +47,6 @@ #define DETECT_DATASET_CMD_ISNOTSET 2 #define DETECT_DATASET_CMD_ISSET 3 -int DetectDatasetMatch (ThreadVars *, DetectEngineThreadCtx *, Packet *, - const Signature *, const SigMatchCtx *); static int DetectDatasetSetup (DetectEngineCtx *, Signature *, const char *); void DetectDatasetFree (DetectEngineCtx *, void *); From b5184bd1ba1388958882e61561b68e863d9de5c7 Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Thu, 9 Jan 2025 12:16:09 +0530 Subject: [PATCH 2/4] rust: add macro to return val if unwrap fails --- rust/src/debug.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rust/src/debug.rs b/rust/src/debug.rs index caeecff93a68..16b128390f4c 100644 --- a/rust/src/debug.rs +++ b/rust/src/debug.rs @@ -236,3 +236,13 @@ macro_rules! debug_validate_fail ( } }; ); + +#[macro_export] +macro_rules! unwrap_or_return ( + ($e:expr, $r:expr) => { + match $e { + Ok(x) => x, + Err(_) => return $r, + } + }; +); From 22625b1f7fa9ae60e39cc7ff2932be3f6421d94f Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Thu, 9 Jan 2025 12:17:13 +0530 Subject: [PATCH 3/4] rust: add C callback for FatalErrorOnInit --- rust/src/debug.rs | 23 +++++++++++++++++++++-- src/util-debug.c | 5 +++++ src/util-debug.h | 2 ++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/rust/src/debug.rs b/rust/src/debug.rs index 16b128390f4c..72f7b737c310 100644 --- a/rust/src/debug.rs +++ b/rust/src/debug.rs @@ -20,12 +20,19 @@ use std::{ffi::CString, path::Path}; use crate::core::SC; +use std::os::raw::c_char; /// cbindgen:ignore -extern { +extern "C" { pub fn SCLogGetLogLevel() -> i32; } +// Defined in util-debug.c +/// cbindgen:ignore +extern "C" { + pub fn SCFatalErrorOnInitStatic(arg: *const c_char); +} + #[derive(Debug)] #[repr(C)] pub enum Level { @@ -62,6 +69,12 @@ fn basename(filename: &str) -> &str { return filename; } +pub fn fatalerror(message: &str) { + unsafe { + SCFatalErrorOnInitStatic(to_safe_cstring(message).as_ptr()); + } +} + pub fn sclog(level: Level, file: &str, line: u32, function: &str, message: &str) { let filename = basename(file); let noext = &filename[0..filename.len() - 3]; @@ -203,13 +216,19 @@ macro_rules! SCLogDebug { ($($arg:tt)*) => {}; } +#[macro_export] +macro_rules!SCFatalErrorOnInit { + ($($arg:tt)*) => { + $crate::debug::fatalerror(&format!($($arg)*)); + } +} + #[cfg(not(feature = "debug-validate"))] #[macro_export] macro_rules! debug_validate_bug_on ( ($item:expr) => {}; ); - #[cfg(feature = "debug-validate")] #[macro_export] macro_rules! debug_validate_bug_on ( diff --git a/src/util-debug.c b/src/util-debug.c index 56258a034f94..f315d1289048 100644 --- a/src/util-debug.c +++ b/src/util-debug.c @@ -1614,6 +1614,11 @@ void SCLogDeInitLogModule(void) #endif /* OS_WIN32 */ } +void SCFatalErrorOnInitStatic(const char *arg) +{ + FatalErrorOnInit("%s", arg); +} + //------------------------------------Unit_Tests-------------------------------- /* The logging engine should be tested to the maximum extent possible, since diff --git a/src/util-debug.h b/src/util-debug.h index 29beb912c927..6ed0eccf5b19 100644 --- a/src/util-debug.h +++ b/src/util-debug.h @@ -530,6 +530,8 @@ SCLogInitData *SCLogAllocLogInitData(void); void SCLogAppendOPIfaceCtx(SCLogOPIfaceCtx *, SCLogInitData *); +void SCFatalErrorOnInitStatic(const char *); + void SCLogInitLogModule(SCLogInitData *); void SCLogDeInitLogModule(void); From e13a1f3e726c42501e569cb28b576e4917c966a9 Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Thu, 9 Jan 2025 12:18:24 +0530 Subject: [PATCH 4/4] datasets: move initial file reading to rust In a recent warning reported by scan-build, datasets were found to be using a blocking call in a critical section. datasets.c:187:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 187 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:292:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 292 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:368:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 368 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:442:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 442 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:512:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 512 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 warnings generated. These calls are blocking in the multi tenant mode where several tenants may be trying to load the same dataset in parallel. In a single tenant mode, this operation is performed as a part of a single thread before the engine startup. In order to evade the warning and simplify the code, the initial file reading is moved to Rust with this commit with a much simpler handling of dataset and datarep. Bug 7398 --- rust/cbindgen.toml | 1 + rust/src/detect/datasets.rs | 303 +++++++++++++++++++++++++++++++ rust/src/detect/mod.rs | 1 + src/datasets-reputation.h | 4 +- src/datasets.c | 353 +++--------------------------------- src/datasets.h | 1 + 6 files changed, 330 insertions(+), 333 deletions(-) create mode 100644 rust/src/detect/datasets.rs diff --git a/rust/cbindgen.toml b/rust/cbindgen.toml index eac6aa737760..e2730789c808 100644 --- a/rust/cbindgen.toml +++ b/rust/cbindgen.toml @@ -84,6 +84,7 @@ include = [ "FtpEvent", "SCSigTableElmt", "SCTransformTableElmt", + "DataRepType", ] # A list of items to not include in the generated bindings diff --git a/rust/src/detect/datasets.rs b/rust/src/detect/datasets.rs new file mode 100644 index 000000000000..2f4d968be200 --- /dev/null +++ b/rust/src/detect/datasets.rs @@ -0,0 +1,303 @@ +/* Copyright (C) 2025 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +// Author: Shivani Bhardwaj + +//! This module exposes items from the datasets C code to Rust. + +use base64::{self, Engine}; +use std::ffi::{c_char, CStr}; +use std::fs::{File, OpenOptions}; +use std::io::{self, BufRead}; +use std::mem::transmute; +use std::net::{Ipv4Addr, Ipv6Addr}; +use std::path::Path; +use std::str::FromStr; + +/// Opaque Dataset type defined in C +#[derive(Copy, Clone)] +pub enum Dataset {} + +// Simple C type converted to Rust +#[derive(Debug, PartialEq)] +#[repr(C)] +pub struct DataRepType { + pub value: u16, +} + +#[derive(Debug)] +#[repr(C)] +pub enum DatasetType { + DSString = 0, + DSMd5, + DSSha256, + DSIpv4, + DSIpv6, +} + +// Extern fns operating on the opaque Dataset type above +/// cbindgen:ignore +extern "C" { + pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32; + pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType) + -> i32; +} + +#[no_mangle] +pub unsafe extern "C" fn ParseDatasets( + set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char, + dstype: DatasetType, +) -> i32 { + let file_string = unwrap_or_return!(CStr::from_ptr(fname).to_str(), -2); + let mode = unwrap_or_return!(CStr::from_ptr(fmode).to_str(), -2); + let set_name = unwrap_or_return!(CStr::from_ptr(name).to_str(), -2); + let filename = Path::new(file_string); + let mut no_rep = false; + let mut with_rep = false; + let lines = match read_or_create_file(filename, mode) { + Ok(fp) => fp, + Err(_) => return -1, + }; + for line in lines.map_while(Result::ok) { + let v: Vec<&str> = line.split(',').collect(); + // Ignore empty and invalid lines in dataset/rep file + if v.is_empty() || v.len() > 2 { + continue; + } + + if v.len() == 1 { + if with_rep { + SCLogError!( + "Cannot mix dataset and datarep values for set {} in {}", + set_name, + filename.display() + ); + return -2; + } + // Dataset + no_rep = true; + } else { + if no_rep { + SCLogError!( + "Cannot mix dataset and datarep values for set {} in {}", + set_name, + filename.display() + ); + return -2; + } + // Datarep + with_rep = true; + } + match dstype { + DatasetType::DSString => { + if process_string_set(set, v, set_name, filename, no_rep) == -1 { + continue; + } + } + DatasetType::DSMd5 => { + if process_md5_set(set, v, set_name, filename, no_rep) == -1 { + continue; + } + } + DatasetType::DSSha256 => { + if process_sha256_set(set, v, set_name, filename, no_rep) == -1 { + continue; + } + } + DatasetType::DSIpv4 => { + if process_ipv4_set(set, v, set_name, filename, no_rep) == -1 { + continue; + } + } + DatasetType::DSIpv6 => { + if process_ipv6_set(set, v, set_name, filename, no_rep) == -1 { + continue; + } + } + } + } + + 0 +} + +unsafe fn process_string_set( + set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool, +) -> i32 { + let mut decoded: Vec = vec![]; + if base64::engine::general_purpose::STANDARD + .decode_vec(v[0], &mut decoded) + .is_err() + { + SCFatalErrorOnInit!("bad base64 encoding {} in {}", set_name, filename.display()); + return -1; + } + if no_rep { + DatasetAdd(set, decoded.as_ptr(), decoded.len() as u32); + } else if let Ok(val) = v[1].to_string().parse::() { + let rep: DataRepType = DataRepType { value: val }; + DatasetAddwRep(set, decoded.as_ptr(), decoded.len() as u32, &rep); + } else { + SCFatalErrorOnInit!( + "invalid datarep value {} in {}", + set_name, + filename.display() + ); + return -1; + } + 0 +} + +unsafe fn process_md5_set( + set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool, +) -> i32 { + let md5_string = match hex::decode(v[0]) { + Ok(rs) => rs, + Err(_) => return -1, + }; + + if no_rep { + DatasetAdd(set, md5_string.as_ptr(), 16); + } else if let Ok(val) = v[1].to_string().parse::() { + let rep: DataRepType = DataRepType { value: val }; + DatasetAddwRep(set, md5_string.as_ptr(), 16, &rep); + } else { + SCFatalErrorOnInit!( + "invalid datarep value {} in {}", + set_name, + filename.display() + ); + return -1; + } + 0 +} + +unsafe fn process_sha256_set( + set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool, +) -> i32 { + let sha256_string = match hex::decode(v[0]) { + Ok(rs) => rs, + Err(_) => return -1, + }; + + if no_rep { + DatasetAdd(set, sha256_string.as_ptr(), 32); + } else if let Ok(val) = v[1].to_string().parse::() { + let rep: DataRepType = DataRepType { value: val }; + DatasetAddwRep(set, sha256_string.as_ptr(), 32, &rep); + } else { + SCFatalErrorOnInit!( + "invalid datarep value {} in {}", + set_name, + filename.display() + ); + return -1; + } + 0 +} + +unsafe fn process_ipv4_set( + set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool, +) -> i32 { + let ipv4 = match Ipv4Addr::from_str(v[0]) { + Ok(a) => a, + Err(_) => { + SCFatalErrorOnInit!("invalid Ipv4 value {} in {}", set_name, filename.display()); + return -1; + } + }; + if no_rep { + DatasetAdd(set, ipv4.octets().as_ptr(), 4); + } else if let Ok(val) = v[1].to_string().parse::() { + let rep: DataRepType = DataRepType { value: val }; + DatasetAddwRep(set, ipv4.octets().as_ptr(), 4, &rep); + } else { + SCFatalErrorOnInit!( + "invalid datarep value {} in {}", + set_name, + filename.display() + ); + return -1; + } + 0 +} + +unsafe fn process_ipv6_set( + set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool, +) -> i32 { + let ipv6 = match Ipv6Addr::from_str(v[0]) { + Ok(a) => a, + Err(_) => { + SCFatalErrorOnInit!("invalid Ipv6 value {} in {}", set_name, filename.display()); + return -1; + } + }; + let mut fin_ipv6 = ipv6; + + if ipv6.to_ipv4_mapped().is_some() { + let ipv6_octets = ipv6.octets(); + let mut internal_ipv6: [u8; 16] = [0; 16]; + internal_ipv6[0] = ipv6_octets[12]; + internal_ipv6[1] = ipv6_octets[13]; + internal_ipv6[2] = ipv6_octets[14]; + internal_ipv6[3] = ipv6_octets[15]; + + // [u8; 16] is always safe to transmute to [u16; 8] + let [s0, s1, s2, s3, s4, s5, s6, s7] = + unsafe { transmute::<[u8; 16], [u16; 8]>(internal_ipv6) }; + fin_ipv6 = [ + u16::from_be(s0), + u16::from_be(s1), + u16::from_be(s2), + u16::from_be(s3), + u16::from_be(s4), + u16::from_be(s5), + u16::from_be(s6), + u16::from_be(s7), + ] + .into(); + } + if no_rep { + DatasetAdd(set, fin_ipv6.octets().as_ptr(), 16); + } else if let Ok(val) = v[1].to_string().parse::() { + let rep: DataRepType = DataRepType { value: val }; + DatasetAddwRep(set, fin_ipv6.octets().as_ptr(), 16, &rep); + } else { + SCFatalErrorOnInit!( + "invalid datarep value {} in {}", + set_name, + filename.display() + ); + return -1; + } + 0 +} + +fn read_or_create_file

(filename: P, fmode: &str) -> io::Result>> +where + P: AsRef, +{ + let file: File = if fmode == "r" { + File::open(filename)? + } else { + OpenOptions::new() + .append(true) + .create(true) + .read(true) + .open(filename)? + }; + Ok(io::BufReader::new(file).lines()) +} diff --git a/rust/src/detect/mod.rs b/rust/src/detect/mod.rs index c00f0dfdeb18..899950730a21 100644 --- a/rust/src/detect/mod.rs +++ b/rust/src/detect/mod.rs @@ -30,6 +30,7 @@ pub mod uint; pub mod uri; pub mod tojson; pub mod vlan; +pub mod datasets; use crate::core::AppProto; use std::os::raw::{c_int, c_void}; diff --git a/src/datasets-reputation.h b/src/datasets-reputation.h index 3483d823cd6e..18ced6803705 100644 --- a/src/datasets-reputation.h +++ b/src/datasets-reputation.h @@ -24,9 +24,7 @@ #ifndef SURICATA_DATASETS_REPUTATION_H #define SURICATA_DATASETS_REPUTATION_H -typedef struct DataRepType { - uint16_t value; -} DataRepType; +#include "rust-bindings.h" typedef struct DataRepResultType { bool found; diff --git a/src/datasets.c b/src/datasets.c index 6db46bc56703..7bda136d22a5 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -44,8 +44,7 @@ SCMutex sets_lock = SCMUTEX_INITIALIZER; static Dataset *sets = NULL; static uint32_t set_ids = 0; -static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, - DataRepType *rep); +int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep); static inline void DatasetUnlockData(THashData *d) { @@ -121,50 +120,6 @@ static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs) return 0; } -static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out) -{ - SCLogDebug("in '%s'", in); - char raw[ins + 1]; - memcpy(raw, in, ins); - raw[ins] = '\0'; - char *line = raw; - - char *ptrs[1] = {NULL}; - int idx = 0; - - size_t i = 0; - while (i < ins + 1) { - if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') { - line[i] = '\0'; - SCLogDebug("line '%s'", line); - - ptrs[idx] = line; - idx++; - - if (idx == 1) - break; - } else { - i++; - } - } - - if (idx != 1) { - SCLogDebug("idx %d", idx); - return -1; - } - - uint16_t v = 0; - int r = StringParseU16RangeCheck(&v, 10, strlen(ptrs[0]), ptrs[0], 0, USHRT_MAX); - if (r != (int)strlen(ptrs[0])) { - SCLogError("'%s' is not a valid reputation value (0-65535)", ptrs[0]); - return -1; - } - SCLogDebug("v %"PRIu16" raw %s", v, ptrs[0]); - - rep_out->value = v; - return 0; -} - static int DatasetLoadIPv4(Dataset *set) { if (strlen(set->load) == 0) @@ -176,66 +131,15 @@ static int DatasetLoadIPv4(Dataset *set) fopen_mode = "a+"; } - FILE *fp = fopen(set->load, fopen_mode); - if (fp == NULL) { - SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4); + if (retval == -2) { + FatalErrorOnInit("dataset %s could not be processed", set->name); + } else if (retval == -1) { return -1; } - uint32_t cnt = 0; - char line[1024]; - while (fgets(line, (int)sizeof(line), fp) != NULL) { - char *r = strchr(line, ','); - if (r == NULL) { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - - struct in_addr in; - if (inet_pton(AF_INET, line, &in) != 1) { - FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line); - continue; - } - - if (DatasetAdd(set, (const uint8_t *)&in.s_addr, 4) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - - /* list with rep data */ - } else { - line[strlen(line) - 1] = '\0'; - SCLogDebug("IPv4 with REP line: '%s'", line); - - *r = '\0'; - - struct in_addr in; - if (inet_pton(AF_INET, line, &in) != 1) { - FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line); - continue; - } - - r++; - - DataRepType rep = { .value = 0 }; - if (ParseRepLine(r, strlen(r), &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } - - SCLogDebug("rep v:%u", rep.value); - if (DatasetAddwRep(set, (const uint8_t *)&in.s_addr, 4, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - - cnt++; - } - } THashConsolidateMemcap(set->hash); - fclose(fp); - SCLogConfig("dataset: %s loaded %u records", set->name, cnt); return 0; } @@ -281,68 +185,15 @@ static int DatasetLoadIPv6(Dataset *set) fopen_mode = "a+"; } - FILE *fp = fopen(set->load, fopen_mode); - if (fp == NULL) { - SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6); + if (retval == -2) { + FatalErrorOnInit("dataset %s could not be processed", set->name); + } else if (retval == -1) { return -1; } - uint32_t cnt = 0; - char line[1024]; - while (fgets(line, (int)sizeof(line), fp) != NULL) { - char *r = strchr(line, ','); - if (r == NULL) { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - - struct in6_addr in6; - int ret = ParseIpv6String(set, line, &in6); - if (ret < 0) { - FatalErrorOnInit("unable to parse IP address"); - continue; - } - - if (DatasetAdd(set, (const uint8_t *)&in6.s6_addr, 16) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - - /* list with rep data */ - } else { - line[strlen(line) - 1] = '\0'; - SCLogDebug("IPv6 with REP line: '%s'", line); - - *r = '\0'; - - struct in6_addr in6; - int ret = ParseIpv6String(set, line, &in6); - if (ret < 0) { - FatalErrorOnInit("unable to parse IP address"); - continue; - } - - r++; - - DataRepType rep = { .value = 0 }; - if (ParseRepLine(r, strlen(r), &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } - - SCLogDebug("rep v:%u", rep.value); - if (DatasetAddwRep(set, (const uint8_t *)&in6.s6_addr, 16, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - - cnt++; - } - } THashConsolidateMemcap(set->hash); - fclose(fp); - SCLogConfig("dataset: %s loaded %u records", set->name, cnt); return 0; } @@ -357,66 +208,15 @@ static int DatasetLoadMd5(Dataset *set) fopen_mode = "a+"; } - FILE *fp = fopen(set->load, fopen_mode); - if (fp == NULL) { - SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5); + if (retval == -2) { + FatalErrorOnInit("dataset %s could not be processed", set->name); + } else if (retval == -1) { return -1; } - uint32_t cnt = 0; - char line[1024]; - while (fgets(line, (int)sizeof(line), fp) != NULL) { - /* straight black/white list */ - if (strlen(line) == 33) { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - - uint8_t hash[16]; - if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) { - FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); - continue; - } - - if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - - /* list with rep data */ - } else if (strlen(line) > 33 && line[32] == ',') { - line[strlen(line) - 1] = '\0'; - SCLogDebug("MD5 with REP line: '%s'", line); - - uint8_t hash[16]; - if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) { - FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); - continue; - } - - DataRepType rep = { .value = 0}; - if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } - - SCLogDebug("rep v:%u", rep.value); - if (DatasetAddwRep(set, hash, 16, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - - cnt++; - } - else { - FatalErrorOnInit("MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line); - continue; - } - } THashConsolidateMemcap(set->hash); - fclose(fp); - SCLogConfig("dataset: %s loaded %u records", set->name, cnt); return 0; } @@ -431,62 +231,15 @@ static int DatasetLoadSha256(Dataset *set) fopen_mode = "a+"; } - FILE *fp = fopen(set->load, fopen_mode); - if (fp == NULL) { - SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256); + if (retval == -2) { + FatalErrorOnInit("dataset %s could not be processed", set->name); + } else if (retval == -1) { return -1; } - uint32_t cnt = 0; - char line[1024]; - while (fgets(line, (int)sizeof(line), fp) != NULL) { - /* straight black/white list */ - if (strlen(line) == 65) { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - - uint8_t hash[32]; - if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) { - FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); - continue; - } - - if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - - /* list with rep data */ - } else if (strlen(line) > 65 && line[64] == ',') { - line[strlen(line) - 1] = '\0'; - SCLogDebug("SHA-256 with REP line: '%s'", line); - - uint8_t hash[32]; - if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) { - FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load); - continue; - } - - DataRepType rep = { .value = 0 }; - if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) { - FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load); - continue; - } - - SCLogDebug("rep %u", rep.value); - - if (DatasetAddwRep(set, hash, 32, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - } - } THashConsolidateMemcap(set->hash); - fclose(fp); - SCLogConfig("dataset: %s loaded %u records", set->name, cnt); return 0; } @@ -496,80 +249,21 @@ static int DatasetLoadString(Dataset *set) return 0; SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + const char *fopen_mode = "r"; if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { fopen_mode = "a+"; } - FILE *fp = fopen(set->load, fopen_mode); - if (fp == NULL) { - SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString); + if (retval == -2) { + FatalErrorOnInit("dataset %s could not be processed", set->name); + } else if (retval == -1) { return -1; } - uint32_t cnt = 0; - char line[1024]; - while (fgets(line, (int)sizeof(line), fp) != NULL) { - if (strlen(line) <= 1) - continue; - - char *r = strchr(line, ','); - if (r == NULL) { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(line)); - // coverity[alloc_strlen : FALSE] - uint8_t decoded[decoded_size]; - uint32_t num_decoded = SCBase64Decode( - (const uint8_t *)line, strlen(line), SCBase64ModeStrict, decoded); - if (num_decoded == 0 && strlen(line) > 0) { - FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load); - continue; - } - - if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - } else { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - - *r = '\0'; - - uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(line)); - uint8_t decoded[decoded_size]; - uint32_t num_decoded = SCBase64Decode( - (const uint8_t *)line, strlen(line), SCBase64ModeStrict, decoded); - if (num_decoded == 0) { - FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load); - continue; - } - - r++; - SCLogDebug("r '%s'", r); - - DataRepType rep = { .value = 0 }; - if (ParseRepLine(r, strlen(r), &rep) < 0) { - FatalErrorOnInit("die: bad rep"); - continue; - } - SCLogDebug("rep %u", rep.value); - - if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - - SCLogDebug("line with rep %s, %s", line, r); - } - } THashConsolidateMemcap(set->hash); - fclose(fp); - SCLogConfig("dataset: %s loaded %u records", set->name, cnt); return 0; } @@ -1572,8 +1266,7 @@ int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len) return -1; } -static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, - DataRepType *rep) +int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep) { if (set == NULL) return -1; diff --git a/src/datasets.h b/src/datasets.h index 86bfed02b22f..1abfa889baa6 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -19,6 +19,7 @@ #define SURICATA_DATASETS_H #include "util-thash.h" +#include "rust.h" #include "datasets-reputation.h" int DatasetsInit(void);