From a93be0357b5f41344353b60fd3fe00c7bd688411 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 14:17:04 -0700 Subject: [PATCH 01/44] feat: Expected log levels --- crates/compass/src/scraper/log.rs | 133 ++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 crates/compass/src/scraper/log.rs diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs new file mode 100644 index 000000000..9182f93d3 --- /dev/null +++ b/crates/compass/src/scraper/log.rs @@ -0,0 +1,133 @@ +//! Runtime logs +//! +//! Parse and record the logs emitted by the runtime to support +//! pos-processing and analysis. + +#[derive(Debug, serde::Deserialize)] +enum LogLevel { + #[serde(rename = "TRACE")] + Trace, + #[serde(rename = "DEBUG")] + Debug, + #[serde(rename = "INFO")] + Info, + #[serde(rename = "WARNING")] + Warning, + #[serde(rename = "ERROR")] + Error, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json; + + #[test] + fn test_deserialize_trace() { + let json = r#""TRACE""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Trace)); + } + + #[test] + fn test_deserialize_debug() { + let json = r#""DEBUG""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Debug)); + } + + #[test] + fn test_deserialize_info() { + let json = r#""INFO""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Info)); + } + + #[test] + fn test_deserialize_warning() { + let json = r#""WARNING""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Warning)); + } + + #[test] + fn test_deserialize_error() { + let json = r#""ERROR""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Error)); + } + + #[test] + fn test_deserialize_invalid_variant() { + let json = r#""INVALID""#; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_lowercase_fails() { + let json = r#""trace""#; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_mixed_case_fails() { + let json = r#""Trace""#; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_in_struct() { + #[derive(serde::Deserialize)] + struct LogEntry { + level: LogLevel, + message: String, + } + + let json = r#"{"level": "ERROR", "message": "Something went wrong"}"#; + let entry: LogEntry = serde_json::from_str(json).unwrap(); + assert!(matches!(entry.level, LogLevel::Error)); + assert_eq!(entry.message, "Something went wrong"); + } + + #[test] + fn test_deserialize_array_of_levels() { + let json = r#"["TRACE", "INFO", "ERROR"]"#; + let levels: Vec = serde_json::from_str(json).unwrap(); + assert_eq!(levels.len(), 3); + assert!(matches!(levels[0], LogLevel::Trace)); + assert!(matches!(levels[1], LogLevel::Info)); + assert!(matches!(levels[2], LogLevel::Error)); + } + + #[test] + fn test_deserialize_with_whitespace() { + let json = r#" "INFO" "#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Info)); + } + + #[test] + fn test_error_message_contains_valid_options() { + let json = r#""FATAL""#; + let result: Result = serde_json::from_str(json); + + match result { + Err(e) => { + let error_msg = e.to_string(); + // The error message should mention valid variants + assert!(error_msg.contains("unknown variant")); + } + Ok(_) => panic!("Expected deserialization to fail"), + } + } +} + +struct LogRecord { + timestamp: String, + level: LogLevel, + subject: String, + message: String, +} From b33d807cefe988aee1775a65b21ade22ca42c789 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 14:35:14 -0700 Subject: [PATCH 02/44] cfg: Adding dependency on chrono --- Cargo.lock | 2 ++ Cargo.toml | 1 + crates/compass/Cargo.toml | 1 + 3 files changed, 4 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 2f57e0474..4df621c9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -414,6 +414,7 @@ checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ "iana-time-zone", "num-traits", + "serde", "windows-link", ] @@ -777,6 +778,7 @@ dependencies = [ name = "infra-compass-db" version = "0.0.8" dependencies = [ + "chrono", "csv", "duckdb", "serde", diff --git a/Cargo.toml b/Cargo.toml index 8ba59b19c..ea1b76376 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ keywords = ["NLR", "database", "energy", "ordinance"] infra-compass-db = { version = "0.0.8", path = "crates/compass" } anyhow = { version = "1.0.98" } clap = { version = "4.5.40", features = ["cargo"] } +chrono = { version = "0.4.42", default-features = false, features = ["serde"] } csv = { version = "1.3.1" } duckdb = { version = "1.4.0", features = ["bundled"] } serde = { version = "1.0.219", features = ["derive"] } diff --git a/crates/compass/Cargo.toml b/crates/compass/Cargo.toml index 319832ad9..e798aba83 100644 --- a/crates/compass/Cargo.toml +++ b/crates/compass/Cargo.toml @@ -12,6 +12,7 @@ categories.workspace = true keywords.workspace = true [dependencies] +chrono = { workspace = true } csv = { workspace = true } duckdb.workspace = true serde.workspace = true From 653b69544c62ee71cb72ea937ba4c9b875286a0b Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:43:07 -0700 Subject: [PATCH 03/44] feat: LogRecord.parse() --- crates/compass/src/scraper/log.rs | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 9182f93d3..feacddefa 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -131,3 +131,47 @@ struct LogRecord { subject: String, message: String, } + +impl LogRecord { + fn parse(line: &str) -> Result { + // Regex pattern: [timestamp] LEVEL - subject: message + let re = Regex::new(r"^\[([^\]]+)\]\s+(\w+)\s+-\s+([^:]+):\s+(.+)$").unwrap(); + + let caps = re.captures(line).ok_or_else(|| { + crate::error::Error::Undefined(format!("Failed to parse log line: {}", line)) + })?; + + let timestamp_str = caps.get(1).unwrap().as_str().to_string(); + let timestamp = NaiveDateTime::parse_from_str(×tamp_str, "%Y-%m-%d %H:%M:%S,%3f") + .map_err(|e| { + crate::error::Error::Undefined(format!( + "Failed to parse timestamp '{}': {}", + timestamp_str, e + )) + })?; + + let level_str = caps.get(2).unwrap().as_str(); + // Parse the log level + let level = serde_json::from_str(&format!(r#""{}""#, level_str)) + .map_err(|e| format!("Invalid log level '{}': {}", level_str, e)) + .unwrap(); + + let subject = caps.get(3).unwrap().as_str().to_string(); + let message = caps.get(4).unwrap().as_str().to_string(); + + Ok(LogRecord { + timestamp, + level, + subject, + message, + }) + } + + fn parse_lines(input: &str) -> Result> { + input + .lines() + .filter(|line| !line.trim().is_empty()) + .map(Self::parse) + .collect() + } +} From f91e5f1298b63de0bd1c7d5590e5a24f510459f3 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:44:03 -0700 Subject: [PATCH 04/44] test: LogRecord::parse --- crates/compass/src/scraper/log.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index feacddefa..f117cf106 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -175,3 +175,27 @@ impl LogRecord { .collect() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_single_line() { + let line = "[2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS"; + let record = LogRecord::parse(line).unwrap(); + + assert_eq!( + record.timestamp.date(), + chrono::NaiveDate::from_ymd_opt(2025, 12, 6).unwrap() + ); + assert_eq!( + record.timestamp.time(), + chrono::NaiveTime::from_hms_milli_opt(15, 15, 14, 272).unwrap() + ); + + assert!(matches!(record.level, LogLevel::Info)); + assert_eq!(record.subject, "Task-1"); + assert_eq!(record.message, "Running COMPASS"); + } +} From ba5a6a2b1237c59783261f2db4a033d8ea0643cd Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:44:45 -0700 Subject: [PATCH 05/44] feat: LogRecord::init_db --- crates/compass/src/scraper/log.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index f117cf106..801a71e14 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -199,3 +199,21 @@ mod tests { assert_eq!(record.message, "Running COMPASS"); } } + +impl LogRecord { + fn init_db(conn: &duckdb::Transaction) -> Result<()> { + conn.execute_batch( + r" + CREATE SEQUENCE IF NOT EXISTS scrapper_log_seq START 1; + CREATE TABLE IF NOT EXISTS logs ( + id INTEGER PRIMARY KEY DEFAULT NEXTVAL('scrapper_log_seq'), + timestamp TIMESTAMP, + level VARCHAR, + subject VARCHAR, + message VARCHAR + ); + ", + )?; + Ok(()) + } +} From b1fb6c605d847041c65cef8b8e333d2ad742fbb6 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:48:52 -0700 Subject: [PATCH 06/44] feat: LogRecord::init_db --- crates/compass/src/scraper/log.rs | 7 ++++--- crates/compass/src/scraper/mod.rs | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 801a71e14..b29e0bdc7 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -125,8 +125,9 @@ mod tests { } } -struct LogRecord { - timestamp: String, +#[derive(Debug)] +pub(super) struct LogRecord { + timestamp: NaiveDateTime, level: LogLevel, subject: String, message: String, @@ -201,7 +202,7 @@ mod tests { } impl LogRecord { - fn init_db(conn: &duckdb::Transaction) -> Result<()> { + pub(super) fn init_db(conn: &duckdb::Transaction) -> Result<()> { conn.execute_batch( r" CREATE SEQUENCE IF NOT EXISTS scrapper_log_seq START 1; diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index 71eefc2b7..2dbc88ec7 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -1,5 +1,6 @@ //! Support for the ordinance scraper output +mod log; mod metadata; mod ordinance; mod source; @@ -88,6 +89,7 @@ impl ScrapedOrdinance { pub(super) fn init_db(conn: &duckdb::Transaction) -> Result<()> { debug!("Initializing ScrapedOrdinance database"); + log::LogRecord::init_db(conn)?; source::Source::init_db(conn)?; metadata::Metadata::init_db(conn)?; usage::Usage::init_db(conn)?; From 10e029f84c1127a85152f07764af6e131c16d7d6 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:49:31 -0700 Subject: [PATCH 07/44] Importing requirements --- crates/compass/src/scraper/log.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index b29e0bdc7..f29e3a5dc 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -3,7 +3,12 @@ //! Parse and record the logs emitted by the runtime to support //! pos-processing and analysis. -#[derive(Debug, serde::Deserialize)] +use chrono::NaiveDateTime; +use regex::Regex; + +use crate::error::Result; + +#[derive(Debug, PartialEq, serde::Deserialize)] enum LogLevel { #[serde(rename = "TRACE")] Trace, From fb0c726ceb625498d7f5210e475bd63fe33524e7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:50:23 -0700 Subject: [PATCH 08/44] renaming tests --- crates/compass/src/scraper/log.rs | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index f29e3a5dc..d491f2412 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -23,68 +23,68 @@ enum LogLevel { } #[cfg(test)] -mod tests { +mod test_loglevel { use super::*; use serde_json; #[test] - fn test_deserialize_trace() { + fn deserialize_trace() { let json = r#""TRACE""#; let level: LogLevel = serde_json::from_str(json).unwrap(); assert!(matches!(level, LogLevel::Trace)); } #[test] - fn test_deserialize_debug() { + fn deserialize_debug() { let json = r#""DEBUG""#; let level: LogLevel = serde_json::from_str(json).unwrap(); assert!(matches!(level, LogLevel::Debug)); } #[test] - fn test_deserialize_info() { + fn deserialize_info() { let json = r#""INFO""#; let level: LogLevel = serde_json::from_str(json).unwrap(); assert!(matches!(level, LogLevel::Info)); } #[test] - fn test_deserialize_warning() { + fn deserialize_warning() { let json = r#""WARNING""#; let level: LogLevel = serde_json::from_str(json).unwrap(); assert!(matches!(level, LogLevel::Warning)); } #[test] - fn test_deserialize_error() { + fn deserialize_error() { let json = r#""ERROR""#; let level: LogLevel = serde_json::from_str(json).unwrap(); assert!(matches!(level, LogLevel::Error)); } #[test] - fn test_deserialize_invalid_variant() { + fn deserialize_invalid_variant() { let json = r#""INVALID""#; - let result: Result = serde_json::from_str(json); + let result: std::result::Result = serde_json::from_str(json); assert!(result.is_err()); } #[test] - fn test_deserialize_lowercase_fails() { + fn deserialize_lowercase_fails() { let json = r#""trace""#; - let result: Result = serde_json::from_str(json); + let result: std::result::Result = serde_json::from_str(json); assert!(result.is_err()); } #[test] - fn test_deserialize_mixed_case_fails() { + fn deserialize_mixed_case_fails() { let json = r#""Trace""#; - let result: Result = serde_json::from_str(json); + let result: std::result::Result = serde_json::from_str(json); assert!(result.is_err()); } #[test] - fn test_deserialize_in_struct() { + fn deserialize_in_struct() { #[derive(serde::Deserialize)] struct LogEntry { level: LogLevel, @@ -98,7 +98,7 @@ mod tests { } #[test] - fn test_deserialize_array_of_levels() { + fn deserialize_array_of_levels() { let json = r#"["TRACE", "INFO", "ERROR"]"#; let levels: Vec = serde_json::from_str(json).unwrap(); assert_eq!(levels.len(), 3); @@ -108,16 +108,16 @@ mod tests { } #[test] - fn test_deserialize_with_whitespace() { + fn deserialize_with_whitespace() { let json = r#" "INFO" "#; let level: LogLevel = serde_json::from_str(json).unwrap(); assert!(matches!(level, LogLevel::Info)); } #[test] - fn test_error_message_contains_valid_options() { + fn error_message_contains_valid_options() { let json = r#""FATAL""#; - let result: Result = serde_json::from_str(json); + let result: std::result::Result = serde_json::from_str(json); match result { Err(e) => { From c494e9e897a9ccd823253e6094c1d6f42146b6ff Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:50:48 -0700 Subject: [PATCH 09/44] Transparent Regex errors --- crates/compass/src/error.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/compass/src/error.rs b/crates/compass/src/error.rs index 560157106..4c3d9a402 100644 --- a/crates/compass/src/error.rs +++ b/crates/compass/src/error.rs @@ -12,6 +12,9 @@ pub enum Error { #[error(transparent)] DuckDB(#[from] duckdb::Error), + #[error(transparent)] + Regex(#[from] regex::Error), + #[allow(dead_code)] #[error("Undefined error")] // Used during development while it is not clear a category of error From 818aa1f797e4e1e7d7c830da3cf40ade17a15b12 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 18:51:13 -0700 Subject: [PATCH 10/44] cfg: Adding Regex as dependency --- Cargo.lock | 1 + Cargo.toml | 1 + crates/compass/Cargo.toml | 1 + 3 files changed, 3 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 4df621c9f..77f99f128 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -781,6 +781,7 @@ dependencies = [ "chrono", "csv", "duckdb", + "regex", "serde", "serde_json", "sha2", diff --git a/Cargo.toml b/Cargo.toml index ea1b76376..6149cd563 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ clap = { version = "4.5.40", features = ["cargo"] } chrono = { version = "0.4.42", default-features = false, features = ["serde"] } csv = { version = "1.3.1" } duckdb = { version = "1.4.0", features = ["bundled"] } +regex = { version = "1.12.2" } serde = { version = "1.0.219", features = ["derive"] } serde_json = { version = "1.0.140" } sha2 = { version = "0.10.8" } diff --git a/crates/compass/Cargo.toml b/crates/compass/Cargo.toml index e798aba83..b1b7dcef4 100644 --- a/crates/compass/Cargo.toml +++ b/crates/compass/Cargo.toml @@ -15,6 +15,7 @@ keywords.workspace = true chrono = { workspace = true } csv = { workspace = true } duckdb.workspace = true +regex = { workspace = true } serde.workspace = true serde_json.workspace = true sha2 = { workspace = true } From 5d2d4f5913aaaa8fa11e32cfe052f10a3305d22b Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 19:51:10 -0700 Subject: [PATCH 11/44] refact: Trim logs before processing --- crates/compass/src/scraper/log.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index d491f2412..e72023e17 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -176,7 +176,8 @@ impl LogRecord { fn parse_lines(input: &str) -> Result> { input .lines() - .filter(|line| !line.trim().is_empty()) + .map(|line| line.trim()) + .filter(|line| !line.is_empty()) .map(Self::parse) .collect() } From 18d5ae34c8d77a65498c2f5a4486783a051aa2e4 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 19:52:01 -0700 Subject: [PATCH 12/44] feat: LogRecord::open() --- crates/compass/src/scraper/log.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index e72023e17..2ee2f4484 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -223,4 +223,12 @@ impl LogRecord { )?; Ok(()) } + + pub(super) async fn open>(root: P) -> Result> { + let path = root.as_ref().join("logs").join("all.log"); + dbg!(&path); + let content = tokio::fs::read_to_string(path).await?; + let records = Self::parse_lines(&content)?; + Ok(records) + } } From 1a8b397af3efb8efbccad028b4c70ee7e4fd8a22 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 19:52:37 -0700 Subject: [PATCH 13/44] feat: LogRecord::sample() --- crates/compass/src/scraper/log.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 2ee2f4484..dc803369f 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -232,3 +232,26 @@ impl LogRecord { Ok(records) } } + +#[cfg(test)] +/// Samples of runtime logs for testing purposes +pub(crate) mod sample { + use crate::error::Result; + use std::io::Write; + + pub(crate) fn as_text_v1() -> String { + r#"[2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS version 0.11.3.dev8+g69a75b7.d20251111 + [2025-12-06 15:15:14,872] INFO - Task-1: Processing 250 jurisdiction(s) + [2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS + [2025-12-06 19:48:10,503] INFO - Task-1: Total runtime: 4:32:55 + "# + .to_string() + } + + pub(crate) fn as_file>(path: P) -> Result { + let mut file = std::fs::File::create(path).unwrap(); + dbg!(&file); + writeln!(file, "{}", as_text_v1()).unwrap(); + Ok(file) + } +} From 4700231c0c0188b9a495298ced6f1b5ffb4be1f7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 20:22:13 -0700 Subject: [PATCH 14/44] test: Adding a sample with a county --- crates/compass/src/scraper/log.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index dc803369f..814fba150 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -243,6 +243,7 @@ pub(crate) mod sample { r#"[2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS version 0.11.3.dev8+g69a75b7.d20251111 [2025-12-06 15:15:14,872] INFO - Task-1: Processing 250 jurisdiction(s) [2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS + [2025-12-06 15:15:14,572] INFO - Jefferson County, Colorado: Running COMPASS [2025-12-06 19:48:10,503] INFO - Task-1: Total runtime: 4:32:55 "# .to_string() From 926f20f652846f55047570a02275365577f6e334 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 20:23:40 -0700 Subject: [PATCH 15/44] feat: Include logs when opening --- crates/compass/src/scraper/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index 2dbc88ec7..1de2a3fcb 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -114,11 +114,12 @@ impl ScrapedOrdinance { return Err(error::Error::Undefined("Path does not exist".to_string())); } - let (source, metadata, usage, ordinance) = tokio::try_join!( + let (source, metadata, usage, ordinance, logs) = tokio::try_join!( source::Source::open(&root), metadata::Metadata::open(&root), usage::Usage::open(&root), - ordinance::Ordinance::open(&root) + ordinance::Ordinance::open(&root), + log::LogRecord::open(&root), )?; trace!("Scraped ordinance opened successfully"); @@ -129,6 +130,7 @@ impl ScrapedOrdinance { source, usage, ordinance, + logs, }) } From 16f53275b2c64345314d03ce78d0e033548e74e0 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 20:24:42 -0700 Subject: [PATCH 16/44] test: Extending open_scraped_ordinaces to include logs --- crates/compass/src/scraper/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index 1de2a3fcb..4c5c8e9ba 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -207,6 +207,8 @@ mod tests { let _metadata_file = metadata::sample::as_file(target.path().join("meta.json")).unwrap(); let _usage_file = usage::sample::as_file(target.path().join("usage.json")).unwrap(); + std::fs::create_dir(&target.path().join("logs")).unwrap(); + let _log_file = log::sample::as_file(target.path().join("logs").join("all.log")).unwrap(); ordinance::sample::as_file(target.path()).unwrap(); let demo = ScrapedOrdinance::open(target).await.unwrap(); From bb9e044edd8ffed92f7b77e3e719dff53f7bb8a5 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 20:40:58 -0700 Subject: [PATCH 17/44] feat: RuntimeLogs as a collection of LogRecord --- crates/compass/src/scraper/log.rs | 28 ++++++++++++++++------------ crates/compass/src/scraper/mod.rs | 7 +++++-- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 814fba150..84b0c3b5e 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -172,15 +172,6 @@ impl LogRecord { message, }) } - - fn parse_lines(input: &str) -> Result> { - input - .lines() - .map(|line| line.trim()) - .filter(|line| !line.is_empty()) - .map(Self::parse) - .collect() - } } #[cfg(test)] @@ -207,7 +198,20 @@ mod tests { } } -impl LogRecord { +#[derive(Debug)] +pub(super) struct RuntimeLogs(Vec); + +impl RuntimeLogs { + fn parse(input: &str) -> Result { + let records: Vec = input + .lines() + .map(|line| line.trim()) + .filter(|line| !line.is_empty()) + .map(LogRecord::parse) + .collect::>>()?; + Ok(RuntimeLogs(records)) + } + pub(super) fn init_db(conn: &duckdb::Transaction) -> Result<()> { conn.execute_batch( r" @@ -224,11 +228,11 @@ impl LogRecord { Ok(()) } - pub(super) async fn open>(root: P) -> Result> { + pub(super) async fn open>(root: P) -> Result { let path = root.as_ref().join("logs").join("all.log"); dbg!(&path); let content = tokio::fs::read_to_string(path).await?; - let records = Self::parse_lines(&content)?; + let records = Self::parse(&content)?; Ok(records) } } diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index 4c5c8e9ba..b29e4b418 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -12,6 +12,7 @@ use tracing::{self, debug, trace}; use crate::error; use crate::error::Result; +use log::RuntimeLogs; use metadata::Metadata; use ordinance::Ordinance; #[allow(unused_imports)] @@ -74,6 +75,8 @@ pub(crate) struct ScrapedOrdinance { usage: Usage, /// The ordinance section ordinance: Ordinance, + /// The runtime logs + logs: RuntimeLogs, } impl ScrapedOrdinance { @@ -89,7 +92,7 @@ impl ScrapedOrdinance { pub(super) fn init_db(conn: &duckdb::Transaction) -> Result<()> { debug!("Initializing ScrapedOrdinance database"); - log::LogRecord::init_db(conn)?; + log::RuntimeLogs::init_db(conn)?; source::Source::init_db(conn)?; metadata::Metadata::init_db(conn)?; usage::Usage::init_db(conn)?; @@ -119,7 +122,7 @@ impl ScrapedOrdinance { metadata::Metadata::open(&root), usage::Usage::open(&root), ordinance::Ordinance::open(&root), - log::LogRecord::open(&root), + log::RuntimeLogs::open(&root), )?; trace!("Scraped ordinance opened successfully"); From d2e53e4a5df2858f438aa3dda37115fc05e1fcc6 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 20:41:42 -0700 Subject: [PATCH 18/44] test, refact: Import super::* --- crates/compass/src/scraper/mod.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index b29e4b418..a711a8600 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -176,11 +176,7 @@ impl ScrapedOrdinance { #[cfg(test)] mod tests { - use super::ScrapedOrdinance; - use super::metadata; - use super::ordinance; - use super::source; - use super::usage; + use super::*; use std::io::Write; #[tokio::test] From bfde5e5a4ae479de9120de3d9fbda78fd490f7f4 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 21:15:35 -0700 Subject: [PATCH 19/44] cfg: Importing duckdb and trace --- crates/compass/src/scraper/log.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 84b0c3b5e..b6ec813a4 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -4,7 +4,9 @@ //! pos-processing and analysis. use chrono::NaiveDateTime; +use duckdb; use regex::Regex; +use tracing::{debug, trace}; use crate::error::Result; From f480e38bdde6522b8c123b0a35307df4ccdd977d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 21:16:04 -0700 Subject: [PATCH 20/44] feat: LogRecord::record() --- crates/compass/src/scraper/log.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index b6ec813a4..38d3527a1 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -174,6 +174,21 @@ impl LogRecord { message, }) } + + fn record(&self, conn: &duckdb::Transaction, bookkeeper_id: usize) -> Result<()> { + trace!("Recording log record: {:?}", self); + conn.execute( + "INSERT INTO logs (bookkeeper_lnk, timestamp, level, subject, message) VALUES (?, ?, ?, ?, ?)", + duckdb::params![ + bookkeeper_id, + self.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(), + format!("{:?}", self.level), + &self.subject, + &self.message, + ], + )?; + Ok(()) + } } #[cfg(test)] From 49e6dfba21ee78e13dff97366c63f04826e71bb7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 11 Dec 2025 21:16:23 -0700 Subject: [PATCH 21/44] feat: Logs record in DB must have bookkeeper id --- crates/compass/src/scraper/log.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 38d3527a1..bb1d52030 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -235,6 +235,7 @@ impl RuntimeLogs { CREATE SEQUENCE IF NOT EXISTS scrapper_log_seq START 1; CREATE TABLE IF NOT EXISTS logs ( id INTEGER PRIMARY KEY DEFAULT NEXTVAL('scrapper_log_seq'), + bookkeeper_lnk INTEGER REFERENCES bookkeeper(id) NOT NULL, timestamp TIMESTAMP, level VARCHAR, subject VARCHAR, From 6d13d20cffebd1be19a7798cd8e8ab6dcc00081c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 07:58:49 -0700 Subject: [PATCH 22/44] style: --- crates/compass/src/scraper/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index a711a8600..15a047bb4 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -206,7 +206,7 @@ mod tests { let _metadata_file = metadata::sample::as_file(target.path().join("meta.json")).unwrap(); let _usage_file = usage::sample::as_file(target.path().join("usage.json")).unwrap(); - std::fs::create_dir(&target.path().join("logs")).unwrap(); + std::fs::create_dir(target.path().join("logs")).unwrap(); let _log_file = log::sample::as_file(target.path().join("logs").join("all.log")).unwrap(); ordinance::sample::as_file(target.path()).unwrap(); From a30d0760b46e2adbfac040cd30c47237b8460130 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 10:36:02 -0700 Subject: [PATCH 23/44] feat: Including DebugToFile A custom log level from Python library. --- crates/compass/src/scraper/log.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index bb1d52030..c6115b35f 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -12,6 +12,8 @@ use crate::error::Result; #[derive(Debug, PartialEq, serde::Deserialize)] enum LogLevel { + #[serde(rename = "DEBUG_TO_FILE")] + DebugToFile, #[serde(rename = "TRACE")] Trace, #[serde(rename = "DEBUG")] From 52b51876981b751678721b7862afc0f06f086642 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 10:39:04 -0700 Subject: [PATCH 24/44] refact: Compile regex only once Major performance improvement. I forgot to lock the regex expression, so it was recompiling it every time. --- crates/compass/src/scraper/log.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index c6115b35f..b70a30748 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -142,12 +142,15 @@ pub(super) struct LogRecord { message: String, } +use std::sync::LazyLock; + impl LogRecord { fn parse(line: &str) -> Result { // Regex pattern: [timestamp] LEVEL - subject: message - let re = Regex::new(r"^\[([^\]]+)\]\s+(\w+)\s+-\s+([^:]+):\s+(.+)$").unwrap(); + static RE: LazyLock = + LazyLock::new(|| Regex::new(r"^\[([^\]]+)\]\s+(\w+)\s+-\s+([^:]+):\s+(.+)$").unwrap()); - let caps = re.captures(line).ok_or_else(|| { + let caps = RE.captures(line).ok_or_else(|| { crate::error::Error::Undefined(format!("Failed to parse log line: {}", line)) })?; From 55333a559837785f6cc0134ff4ec921498f7c084 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 11:12:32 -0700 Subject: [PATCH 25/44] fix: Removing extra spaces from sample --- crates/compass/src/scraper/log.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index b70a30748..4f53164d7 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -267,11 +267,12 @@ pub(crate) mod sample { use std::io::Write; pub(crate) fn as_text_v1() -> String { - r#"[2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS version 0.11.3.dev8+g69a75b7.d20251111 - [2025-12-06 15:15:14,872] INFO - Task-1: Processing 250 jurisdiction(s) - [2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS - [2025-12-06 15:15:14,572] INFO - Jefferson County, Colorado: Running COMPASS - [2025-12-06 19:48:10,503] INFO - Task-1: Total runtime: 4:32:55 + r#" +[2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS version 0.11.3.dev8+g69a75b7.d20251111 +[2025-12-06 15:15:14,872] INFO - Task-1: Processing 250 jurisdiction(s) +[2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS +[2025-12-06 15:15:14,572] INFO - Jefferson County, Colorado: Running COMPASS +[2025-12-06 19:48:10,503] INFO - Task-1: Total runtime: 4:32:55 "# .to_string() } From da4de66c105653236c0f7cc480639961fd07f794 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 11:13:11 -0700 Subject: [PATCH 26/44] doc: RuntimeLogs::parse() --- crates/compass/src/scraper/log.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 4f53164d7..5d503558d 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -224,6 +224,15 @@ mod tests { pub(super) struct RuntimeLogs(Vec); impl RuntimeLogs { + /// Parse runtime logs from text input + /// + /// # Notes + /// - For now, hardcoded to only keep INFO, WARNING, and ERROR levels. + /// These logs are quite long with the more verbose levels. Since I + /// collect everything, it is better to filter and reduce it here. + /// In the future, consider returning an iterator instead. + /// - Ignore mulltiple lines messages. + /// - Lines that fail to parse are skipped with a warning. fn parse(input: &str) -> Result { let records: Vec = input .lines() From f79cdb22ba46147f5d36658e74087dbf580b2385 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 11:13:58 -0700 Subject: [PATCH 27/44] refact: Filtering and logging RuntimeLogs::parse --- crates/compass/src/scraper/log.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index 5d503558d..ee01bf40b 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -236,10 +236,27 @@ impl RuntimeLogs { fn parse(input: &str) -> Result { let records: Vec = input .lines() - .map(|line| line.trim()) - .filter(|line| !line.is_empty()) - .map(LogRecord::parse) - .collect::>>()?; + .filter(|line| !line.trim().is_empty()) + .filter_map(|line| match LogRecord::parse(line) { + Ok(record) => { + trace!("Parsed log line: {}", line); + Some(record) + } + Err(e) => { + trace!("Failed to parse log line: {}. Error: {}", line, e); + None + } + }) + .filter(|record| { + (record.level == LogLevel::Info) + || (record.level == LogLevel::Warning) + || (record.level == LogLevel::Error) + }) + .map(|record| { + debug!("Keeping log record: {:?}", record); + record + }) + .collect(); Ok(RuntimeLogs(records)) } From 6d8bf27b0c010ad34652d8f6657015a549a78f1e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 11:31:26 -0700 Subject: [PATCH 28/44] log: Bump recording on database message to info level --- crates/compass/src/scraper/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index 15a047bb4..82a3c26a6 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -140,7 +140,7 @@ impl ScrapedOrdinance { #[allow(dead_code)] pub(crate) async fn push(&self, conn: &mut duckdb::Connection, commit_id: usize) -> Result<()> { // Load the ordinance into the database - tracing::trace!("Pushing scraped ordinance into the database"); + tracing::info!("Recording scraped ordinance into the database"); let conn = conn.transaction().unwrap(); tracing::trace!("Transaction started"); From 16850c9d66d3dc6ac557dc37a4e14c45760b7fd7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 11:33:13 -0700 Subject: [PATCH 29/44] refact: Minimum information with expect instead of unwrap --- crates/compass/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/compass/src/lib.rs b/crates/compass/src/lib.rs index 2a373f723..aeea7030d 100644 --- a/crates/compass/src/lib.rs +++ b/crates/compass/src/lib.rs @@ -118,7 +118,7 @@ pub fn load_ordinance + std::fmt::Debug>( let ordinance = runtime .block_on(scraper::ScrapedOrdinance::open(ordinance_path)) - .unwrap(); + .expect("Failed to open ordinance data source"); conn.commit().unwrap(); tracing::debug!("Transaction committed"); From 152eb1f6e4214de6f43256137674b4919fed1dde Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 11:42:16 -0700 Subject: [PATCH 30/44] feat: Recording logs in the DB --- crates/compass/src/scraper/log.rs | 11 +++++++++++ crates/compass/src/scraper/mod.rs | 1 + 2 files changed, 12 insertions(+) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index ee01bf40b..a26368ab0 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -284,6 +284,17 @@ impl RuntimeLogs { let records = Self::parse(&content)?; Ok(records) } + + pub(super) fn record(&self, conn: &duckdb::Transaction, commit_id: usize) -> Result<()> { + // debug!("Recording log: {:?}", self); + debug!("Recording runtime logs"); + + for record in &self.0 { + record.record(conn, commit_id)?; + } + + Ok(()) + } } #[cfg(test)] diff --git a/crates/compass/src/scraper/mod.rs b/crates/compass/src/scraper/mod.rs index 82a3c26a6..e8c37184c 100644 --- a/crates/compass/src/scraper/mod.rs +++ b/crates/compass/src/scraper/mod.rs @@ -150,6 +150,7 @@ impl ScrapedOrdinance { self.metadata.write(&conn, commit_id).unwrap(); self.usage().await.unwrap().write(&conn, commit_id).unwrap(); self.ordinance.write(&conn, commit_id).unwrap(); + self.logs.record(&conn, commit_id).unwrap(); tracing::trace!("Committing transaction"); conn.commit()?; From 9722898dee05bdb9f877999804cfd88dc2eea6da Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 12 Dec 2025 14:37:36 -0700 Subject: [PATCH 31/44] style: Left behind some imports from development --- crates/compass/src/scraper/log.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log.rs index a26368ab0..40c29a75c 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log.rs @@ -4,7 +4,6 @@ //! pos-processing and analysis. use chrono::NaiveDateTime; -use duckdb; use regex::Regex; use tracing::{debug, trace}; @@ -29,7 +28,6 @@ enum LogLevel { #[cfg(test)] mod test_loglevel { use super::*; - use serde_json; #[test] fn deserialize_trace() { From 6404901306280a212fb394f54066b0f432d29883 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 08:56:07 -0700 Subject: [PATCH 32/44] refact: Breaking log module in parts --- crates/compass/src/scraper/log/loglevel.rs | 133 ++++++++++++++++++ .../src/scraper/{log.rs => log/mod.rs} | 130 +---------------- 2 files changed, 138 insertions(+), 125 deletions(-) create mode 100644 crates/compass/src/scraper/log/loglevel.rs rename crates/compass/src/scraper/{log.rs => log/mod.rs} (64%) diff --git a/crates/compass/src/scraper/log/loglevel.rs b/crates/compass/src/scraper/log/loglevel.rs new file mode 100644 index 000000000..b33e9b0dd --- /dev/null +++ b/crates/compass/src/scraper/log/loglevel.rs @@ -0,0 +1,133 @@ +//! Runtime logs +//! +//! Parse and record the logs emitted by the runtime to support +//! pos-processing and analysis. + +use chrono::NaiveDateTime; +use regex::Regex; +use tracing::{debug, trace}; + +use crate::error::Result; + +#[derive(Debug, PartialEq, serde::Deserialize)] +pub(super) enum LogLevel { + #[serde(rename = "DEBUG_TO_FILE")] + DebugToFile, + #[serde(rename = "TRACE")] + Trace, + #[serde(rename = "DEBUG")] + Debug, + #[serde(rename = "INFO")] + Info, + #[serde(rename = "WARNING")] + Warning, + #[serde(rename = "ERROR")] + Error, +} + +#[cfg(test)] +mod test_loglevel { + use super::*; + + #[test] + fn deserialize_trace() { + let json = r#""TRACE""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Trace)); + } + + #[test] + fn deserialize_debug() { + let json = r#""DEBUG""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Debug)); + } + + #[test] + fn deserialize_info() { + let json = r#""INFO""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Info)); + } + + #[test] + fn deserialize_warning() { + let json = r#""WARNING""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Warning)); + } + + #[test] + fn deserialize_error() { + let json = r#""ERROR""#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Error)); + } + + #[test] + fn deserialize_invalid_variant() { + let json = r#""INVALID""#; + let result: std::result::Result = serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn deserialize_lowercase_fails() { + let json = r#""trace""#; + let result: std::result::Result = serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn deserialize_mixed_case_fails() { + let json = r#""Trace""#; + let result: std::result::Result = serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn deserialize_in_struct() { + #[derive(serde::Deserialize)] + struct LogEntry { + level: LogLevel, + message: String, + } + + let json = r#"{"level": "ERROR", "message": "Something went wrong"}"#; + let entry: LogEntry = serde_json::from_str(json).unwrap(); + assert!(matches!(entry.level, LogLevel::Error)); + assert_eq!(entry.message, "Something went wrong"); + } + + #[test] + fn deserialize_array_of_levels() { + let json = r#"["TRACE", "INFO", "ERROR"]"#; + let levels: Vec = serde_json::from_str(json).unwrap(); + assert_eq!(levels.len(), 3); + assert!(matches!(levels[0], LogLevel::Trace)); + assert!(matches!(levels[1], LogLevel::Info)); + assert!(matches!(levels[2], LogLevel::Error)); + } + + #[test] + fn deserialize_with_whitespace() { + let json = r#" "INFO" "#; + let level: LogLevel = serde_json::from_str(json).unwrap(); + assert!(matches!(level, LogLevel::Info)); + } + + #[test] + fn error_message_contains_valid_options() { + let json = r#""FATAL""#; + let result: std::result::Result = serde_json::from_str(json); + + match result { + Err(e) => { + let error_msg = e.to_string(); + // The error message should mention valid variants + assert!(error_msg.contains("unknown variant")); + } + Ok(_) => panic!("Expected deserialization to fail"), + } + } +} diff --git a/crates/compass/src/scraper/log.rs b/crates/compass/src/scraper/log/mod.rs similarity index 64% rename from crates/compass/src/scraper/log.rs rename to crates/compass/src/scraper/log/mod.rs index 40c29a75c..a28b9b29a 100644 --- a/crates/compass/src/scraper/log.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -3,134 +3,16 @@ //! Parse and record the logs emitted by the runtime to support //! pos-processing and analysis. +mod loglevel; + +use std::sync::LazyLock; + use chrono::NaiveDateTime; use regex::Regex; use tracing::{debug, trace}; use crate::error::Result; - -#[derive(Debug, PartialEq, serde::Deserialize)] -enum LogLevel { - #[serde(rename = "DEBUG_TO_FILE")] - DebugToFile, - #[serde(rename = "TRACE")] - Trace, - #[serde(rename = "DEBUG")] - Debug, - #[serde(rename = "INFO")] - Info, - #[serde(rename = "WARNING")] - Warning, - #[serde(rename = "ERROR")] - Error, -} - -#[cfg(test)] -mod test_loglevel { - use super::*; - - #[test] - fn deserialize_trace() { - let json = r#""TRACE""#; - let level: LogLevel = serde_json::from_str(json).unwrap(); - assert!(matches!(level, LogLevel::Trace)); - } - - #[test] - fn deserialize_debug() { - let json = r#""DEBUG""#; - let level: LogLevel = serde_json::from_str(json).unwrap(); - assert!(matches!(level, LogLevel::Debug)); - } - - #[test] - fn deserialize_info() { - let json = r#""INFO""#; - let level: LogLevel = serde_json::from_str(json).unwrap(); - assert!(matches!(level, LogLevel::Info)); - } - - #[test] - fn deserialize_warning() { - let json = r#""WARNING""#; - let level: LogLevel = serde_json::from_str(json).unwrap(); - assert!(matches!(level, LogLevel::Warning)); - } - - #[test] - fn deserialize_error() { - let json = r#""ERROR""#; - let level: LogLevel = serde_json::from_str(json).unwrap(); - assert!(matches!(level, LogLevel::Error)); - } - - #[test] - fn deserialize_invalid_variant() { - let json = r#""INVALID""#; - let result: std::result::Result = serde_json::from_str(json); - assert!(result.is_err()); - } - - #[test] - fn deserialize_lowercase_fails() { - let json = r#""trace""#; - let result: std::result::Result = serde_json::from_str(json); - assert!(result.is_err()); - } - - #[test] - fn deserialize_mixed_case_fails() { - let json = r#""Trace""#; - let result: std::result::Result = serde_json::from_str(json); - assert!(result.is_err()); - } - - #[test] - fn deserialize_in_struct() { - #[derive(serde::Deserialize)] - struct LogEntry { - level: LogLevel, - message: String, - } - - let json = r#"{"level": "ERROR", "message": "Something went wrong"}"#; - let entry: LogEntry = serde_json::from_str(json).unwrap(); - assert!(matches!(entry.level, LogLevel::Error)); - assert_eq!(entry.message, "Something went wrong"); - } - - #[test] - fn deserialize_array_of_levels() { - let json = r#"["TRACE", "INFO", "ERROR"]"#; - let levels: Vec = serde_json::from_str(json).unwrap(); - assert_eq!(levels.len(), 3); - assert!(matches!(levels[0], LogLevel::Trace)); - assert!(matches!(levels[1], LogLevel::Info)); - assert!(matches!(levels[2], LogLevel::Error)); - } - - #[test] - fn deserialize_with_whitespace() { - let json = r#" "INFO" "#; - let level: LogLevel = serde_json::from_str(json).unwrap(); - assert!(matches!(level, LogLevel::Info)); - } - - #[test] - fn error_message_contains_valid_options() { - let json = r#""FATAL""#; - let result: std::result::Result = serde_json::from_str(json); - - match result { - Err(e) => { - let error_msg = e.to_string(); - // The error message should mention valid variants - assert!(error_msg.contains("unknown variant")); - } - Ok(_) => panic!("Expected deserialization to fail"), - } - } -} +use loglevel::LogLevel; #[derive(Debug)] pub(super) struct LogRecord { @@ -140,8 +22,6 @@ pub(super) struct LogRecord { message: String, } -use std::sync::LazyLock; - impl LogRecord { fn parse(line: &str) -> Result { // Regex pattern: [timestamp] LEVEL - subject: message From 2e9a7a211f061cc913be973402ff40c2e36b4ed3 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 09:04:27 -0700 Subject: [PATCH 33/44] clean: --- crates/compass/src/scraper/log/loglevel.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/crates/compass/src/scraper/log/loglevel.rs b/crates/compass/src/scraper/log/loglevel.rs index b33e9b0dd..b0e695ef3 100644 --- a/crates/compass/src/scraper/log/loglevel.rs +++ b/crates/compass/src/scraper/log/loglevel.rs @@ -3,12 +3,7 @@ //! Parse and record the logs emitted by the runtime to support //! pos-processing and analysis. -use chrono::NaiveDateTime; -use regex::Regex; -use tracing::{debug, trace}; - -use crate::error::Result; - +/// Log levels emitted by the (Python) runtime. #[derive(Debug, PartialEq, serde::Deserialize)] pub(super) enum LogLevel { #[serde(rename = "DEBUG_TO_FILE")] From 279067e0f168f97e379f4a1f8510cb12dc9c06ae Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 09:07:26 -0700 Subject: [PATCH 34/44] doc: --- crates/compass/src/scraper/log/mod.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index a28b9b29a..3408066ff 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -1,7 +1,14 @@ //! Runtime logs //! -//! Parse and record the logs emitted by the runtime to support +//! Parse and record the logs from the ordinance runtime to support //! pos-processing and analysis. +//! +//! The most verbose levels are ignored to minimize the impact on the +//! final database size. The outputs are archived, so any forensics +//! can be done by inspecting the raw log files if needed. The purpose +//! here is to support questions such as what is the distribution of +//! cost and runtime per jurisdictions? Which exceptions where captured +//! on each jurisdiction? mod loglevel; @@ -15,6 +22,14 @@ use crate::error::Result; use loglevel::LogLevel; #[derive(Debug)] +/// A single log record parsed from the runtime logs. +/// +/// Expect something like: +/// [2025-12-06 15:15:14,272] INFO - Task-1: Running COMPASS +/// or +/// [2025-12-06 15:15:14,572] INFO - Jefferson County, Colorado: Running COMPASS +/// +/// Note that the 'message' can have multiple lines. pub(super) struct LogRecord { timestamp: NaiveDateTime, level: LogLevel, @@ -23,6 +38,7 @@ pub(super) struct LogRecord { } impl LogRecord { + /// Parse a single log line into a LogRecord fn parse(line: &str) -> Result { // Regex pattern: [timestamp] LEVEL - subject: message static RE: LazyLock = @@ -99,6 +115,7 @@ mod tests { } #[derive(Debug)] +/// Collection of runtime log records pub(super) struct RuntimeLogs(Vec); impl RuntimeLogs { From 834e2b18fd162a292ea7b04f00b19aebe14776ed Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 13:05:25 -0700 Subject: [PATCH 35/44] typo: --- crates/compass/src/scraper/log/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index 3408066ff..89fbd2402 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -1,13 +1,13 @@ //! Runtime logs //! //! Parse and record the logs from the ordinance runtime to support -//! pos-processing and analysis. +//! post-processing and analysis. //! //! The most verbose levels are ignored to minimize the impact on the //! final database size. The outputs are archived, so any forensics //! can be done by inspecting the raw log files if needed. The purpose //! here is to support questions such as what is the distribution of -//! cost and runtime per jurisdictions? Which exceptions where captured +//! cost and runtime per jurisdictions? Which exceptions were captured //! on each jurisdiction? mod loglevel; From 46dbf2e7bdf57594d034f8fc5d4312ca0edbbc66 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 13:05:41 -0700 Subject: [PATCH 36/44] clean: --- crates/compass/src/scraper/log/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index 89fbd2402..02fa3a178 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -174,7 +174,6 @@ impl RuntimeLogs { pub(super) async fn open>(root: P) -> Result { let path = root.as_ref().join("logs").join("all.log"); - dbg!(&path); let content = tokio::fs::read_to_string(path).await?; let records = Self::parse(&content)?; Ok(records) From a88a1ef9d38bfd479596d1a99da5d1b8f14e6403 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 13:07:43 -0700 Subject: [PATCH 37/44] fix: Missing miliseconds --- crates/compass/src/scraper/log/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index 02fa3a178..5828fc104 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -80,7 +80,7 @@ impl LogRecord { "INSERT INTO logs (bookkeeper_lnk, timestamp, level, subject, message) VALUES (?, ?, ?, ?, ?)", duckdb::params![ bookkeeper_id, - self.timestamp.format("%Y-%m-%d %H:%M:%S").to_string(), + self.timestamp.format("%Y-%m-%d %H:%M:%S,%3f").to_string(), format!("{:?}", self.level), &self.subject, &self.message, From 652e1537d63d3870c51a263ec686ba5a793fdbf3 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 13:08:16 -0700 Subject: [PATCH 38/44] doc: --- crates/compass/src/scraper/log/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index 5828fc104..0b2e04865 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -29,7 +29,7 @@ use loglevel::LogLevel; /// or /// [2025-12-06 15:15:14,572] INFO - Jefferson County, Colorado: Running COMPASS /// -/// Note that the 'message' can have multiple lines. +/// The 'message' field only supports single-line log messages. pub(super) struct LogRecord { timestamp: NaiveDateTime, level: LogLevel, From 23500931610f51ec8ed9f61e527e19f59b53e4ba Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 13:10:10 -0700 Subject: [PATCH 39/44] typo: --- crates/compass/src/scraper/log/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index 0b2e04865..50259c672 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -158,9 +158,9 @@ impl RuntimeLogs { pub(super) fn init_db(conn: &duckdb::Transaction) -> Result<()> { conn.execute_batch( r" - CREATE SEQUENCE IF NOT EXISTS scrapper_log_seq START 1; + CREATE SEQUENCE IF NOT EXISTS scraper_log_seq START 1; CREATE TABLE IF NOT EXISTS logs ( - id INTEGER PRIMARY KEY DEFAULT NEXTVAL('scrapper_log_seq'), + id INTEGER PRIMARY KEY DEFAULT NEXTVAL('scraper_log_seq'), bookkeeper_lnk INTEGER REFERENCES bookkeeper(id) NOT NULL, timestamp TIMESTAMP, level VARCHAR, From 4b10335bafe3c7754339246a310dc0ad8f3976a2 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 13:10:26 -0700 Subject: [PATCH 40/44] clean: --- crates/compass/src/scraper/log/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index 50259c672..de5eac30e 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -210,7 +210,6 @@ pub(crate) mod sample { pub(crate) fn as_file>(path: P) -> Result { let mut file = std::fs::File::create(path).unwrap(); - dbg!(&file); writeln!(file, "{}", as_text_v1()).unwrap(); Ok(file) } From 88113116307ab0cc706f074c11aed8b36d1ff507 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sat, 13 Dec 2025 13:13:24 -0700 Subject: [PATCH 41/44] refact: Return one of our custom errros --- crates/compass/src/scraper/log/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index de5eac30e..11b839e98 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -59,9 +59,9 @@ impl LogRecord { let level_str = caps.get(2).unwrap().as_str(); // Parse the log level - let level = serde_json::from_str(&format!(r#""{}""#, level_str)) - .map_err(|e| format!("Invalid log level '{}': {}", level_str, e)) - .unwrap(); + let level = serde_json::from_str(&format!(r#""{}""#, level_str)).map_err(|e| { + crate::error::Error::Undefined(format!("Invalid log level '{}': {}", level_str, e)) + })?; let subject = caps.get(3).unwrap().as_str().to_string(); let message = caps.get(4).unwrap().as_str().to_string(); From 28c79b80af93ee9f687973737d66a6495b1201e1 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 14 Dec 2025 08:43:00 -0700 Subject: [PATCH 42/44] fix: Bad format for microseconds --- crates/compass/src/scraper/log/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index 11b839e98..afeebab57 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -80,7 +80,7 @@ impl LogRecord { "INSERT INTO logs (bookkeeper_lnk, timestamp, level, subject, message) VALUES (?, ?, ?, ?, ?)", duckdb::params![ bookkeeper_id, - self.timestamp.format("%Y-%m-%d %H:%M:%S,%3f").to_string(), + self.timestamp.format("%Y-%m-%d %H:%M:%S.%3f").to_string(), format!("{:?}", self.level), &self.subject, &self.message, From 6d1563ba11a356d14cef41405c3bc672129ecd6a Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 14 Dec 2025 08:43:34 -0700 Subject: [PATCH 43/44] log: Promoting initializing usage to debug level --- crates/compass/src/scraper/usage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/usage.rs b/crates/compass/src/scraper/usage.rs index 5032715b1..831f9d1d4 100644 --- a/crates/compass/src/scraper/usage.rs +++ b/crates/compass/src/scraper/usage.rs @@ -63,7 +63,7 @@ pub(super) struct UsageValues { impl Usage { /// Initialize the database for the Usage context pub(super) fn init_db(conn: &duckdb::Transaction) -> Result<()> { - tracing::trace!("Initializing database for Usage"); + tracing::debug!("Initializing database for Usage"); conn.execute_batch( r" CREATE SEQUENCE usage_sequence START 1; From 3a697a7106f1a6738c40a76aa048eb185bf05d62 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 14 Dec 2025 08:44:05 -0700 Subject: [PATCH 44/44] log: Providing more information on parsing dates --- crates/compass/src/scraper/log/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/compass/src/scraper/log/mod.rs b/crates/compass/src/scraper/log/mod.rs index afeebab57..936cd617c 100644 --- a/crates/compass/src/scraper/log/mod.rs +++ b/crates/compass/src/scraper/log/mod.rs @@ -16,7 +16,7 @@ use std::sync::LazyLock; use chrono::NaiveDateTime; use regex::Regex; -use tracing::{debug, trace}; +use tracing::{debug, error, trace}; use crate::error::Result; use loglevel::LogLevel; @@ -49,8 +49,10 @@ impl LogRecord { })?; let timestamp_str = caps.get(1).unwrap().as_str().to_string(); + trace!("Parsing timestamp: {}", timestamp_str); let timestamp = NaiveDateTime::parse_from_str(×tamp_str, "%Y-%m-%d %H:%M:%S,%3f") .map_err(|e| { + error!("Failed to parse timestamp: {}, error: {}", timestamp_str, e); crate::error::Error::Undefined(format!( "Failed to parse timestamp '{}': {}", timestamp_str, e