diff --git a/monoscope.cabal b/monoscope.cabal index 6cdaca782..cb96aafe6 100644 --- a/monoscope.cabal +++ b/monoscope.cabal @@ -82,6 +82,7 @@ library Models.Apis.RequestDumps Models.Apis.Shapes Models.Apis.Slack + Models.Apis.LogPatterns Models.Projects.Dashboards Models.Projects.GitSync Models.Projects.ProjectApiKeys diff --git a/src/BackgroundJobs.hs b/src/BackgroundJobs.hs index 92984951f..74e55d426 100644 --- a/src/BackgroundJobs.hs +++ b/src/BackgroundJobs.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE TupleSections #-} + module BackgroundJobs (jobsWorkerInit, jobsRunner, processBackgroundJob, BgJobs (..), jobTypeName, runHourlyJob, generateOtelFacetsBatch, processFiveMinuteSpans, processOneMinuteErrors, throwParsePayload, checkTriggeredQueryMonitors, monitorStatus) where import Control.Lens ((.~)) @@ -18,7 +20,7 @@ import Data.Text.Display (display) import Data.Time (DayOfWeek (Monday), UTCTime (utctDay), ZonedTime, addUTCTime, dayOfWeek, formatTime, getZonedTime) import Data.Time.Clock (diffUTCTime) import Data.Time.Format (defaultTimeLocale) -import Data.Time.LocalTime (LocalTime (localDay), ZonedTime (zonedTimeToLocalTime), getCurrentTimeZone, utcToZonedTime) +import Data.Time.LocalTime (LocalTime (localDay), ZonedTime (zonedTimeToLocalTime), getCurrentTimeZone, utcToZonedTime, zonedTimeToUTC) import Data.UUID qualified as UUID import Data.UUID.V4 qualified as UUIDV4 import Data.Vector qualified as V @@ -42,6 +44,7 @@ import Models.Apis.Fields.Facets qualified as Facets import Models.Apis.Fields.Types qualified as Fields import Models.Apis.Issues qualified as Issues import Models.Apis.Issues.Enhancement qualified as Enhancement +import Models.Apis.LogPatterns qualified as LogPatterns import Models.Apis.Monitors qualified as Monitors import Models.Apis.Reports qualified as Reports import Models.Apis.RequestDumps (ATError (..)) @@ -64,7 +67,7 @@ import OpenTelemetry.Attributes qualified as OA import OpenTelemetry.Trace (TracerProvider) import Pages.Charts.Charts qualified as Charts import Pages.Reports qualified as RP -import Pkg.DeriveUtils (UUIDId (..)) +import Pkg.DeriveUtils (BaselineState (..), UUIDId (..)) import Pkg.Drain qualified as Drain import Pkg.GitHub qualified as GitHub import Pkg.Mail (NotificationAlerts (..), sendDiscordAlert, sendPostmarkEmail, sendSlackAlert, sendSlackMessage, sendWhatsAppAlert) @@ -80,7 +83,7 @@ import System.Logging qualified as Log import System.Tracing (SpanStatus (..), Tracing, addEvent, setStatus, withSpan) import System.Types (ATBackgroundCtx, DB, runBackground) import UnliftIO.Exception (bracket, catch, try) -import Utils (DBField) +import Utils (DBField, toXXHash) data BgJobs @@ -110,10 +113,13 @@ data BgJobs | SlackNotification Projects.ProjectId Text | EnhanceIssuesWithLLM Projects.ProjectId (V.Vector Issues.IssueId) | ProcessIssuesEnhancement UTCTime - | FifteenMinutesLogsPatternProcessing UTCTime Projects.ProjectId + | FiveMinutesLogsPatternProcessing UTCTime Projects.ProjectId | GitSyncFromRepo Projects.ProjectId | GitSyncPushDashboard Projects.ProjectId UUID.UUID -- projectId, dashboardId | GitSyncPushAllDashboards Projects.ProjectId -- Push all existing dashboards to repo + | LogPatternBaselineCalculation Projects.ProjectId + | LogPatternSpikeDetection Projects.ProjectId + | NewLogPatternDetected Projects.ProjectId Text deriving stock (Generic, Show) deriving anyclass (AE.FromJSON, AE.ToJSON) @@ -304,7 +310,7 @@ processBackgroundJob authCtx bgJob = -- Schedule 5-minute log pattern extraction forM_ [0 .. 287] \interval -> do let scheduledTime = addUTCTime (fromIntegral $ interval * 300) currentTime - _ <- scheduleJob conn "background_jobs" (BackgroundJobs.FifteenMinutesLogsPatternProcessing scheduledTime p) scheduledTime + _ <- scheduleJob conn "background_jobs" (BackgroundJobs.FiveMinutesLogsPatternProcessing scheduledTime p) scheduledTime pass -- Schedule 5-minute span processing jobs (288 jobs per day = 24 hours * 12 per hour) forM_ [0 .. 287] \interval -> do @@ -348,11 +354,14 @@ processBackgroundJob authCtx bgJob = SlackNotification pid message -> sendSlackMessage pid message EnhanceIssuesWithLLM pid issueIds -> enhanceIssuesWithLLM pid issueIds ProcessIssuesEnhancement scheduledTime -> processIssuesEnhancement scheduledTime - FifteenMinutesLogsPatternProcessing scheduledTime pid -> logsPatternExtraction scheduledTime pid + FiveMinutesLogsPatternProcessing scheduledTime pid -> logsPatternExtraction scheduledTime pid GitSyncFromRepo pid -> gitSyncFromRepo pid GitSyncPushDashboard pid dashboardId -> gitSyncPushDashboard pid (UUIDId dashboardId) GitSyncPushAllDashboards pid -> gitSyncPushAllDashboards pid - QueryMonitorsCheck -> checkTriggeredQueryMonitors + QueryMonitorsCheck -> pass -- checkTriggeredQueryMonitors + LogPatternBaselineCalculation pid -> calculateLogPatternBaselines pid + LogPatternSpikeDetection pid -> detectLogPatternSpikes pid authCtx + NewLogPatternDetected pid patternHash -> processNewLogPattern pid patternHash authCtx -- | Run hourly scheduled tasks for all projects @@ -382,6 +391,12 @@ runHourlyJob scheduledTime hour = do let batchJob = BackgroundJobs.GenerateOtelFacetsBatch (V.fromList batch) scheduledTime createJob conn "background_jobs" batchJob + -- Schedule baseline calculation and spike detection for active projects + liftIO $ withResource ctx.jobsPool \conn -> + forM_ activeProjects \pid -> do + _ <- createJob conn "background_jobs" $ LogPatternBaselineCalculation pid + createJob conn "background_jobs" $ LogPatternSpikeDetection pid + -- Cleanup expired query cache entries deletedCount <- QueryCache.cleanupExpiredCache Relude.when (deletedCount > 0) $ Log.logInfo "Cleaned up expired query cache entries" ("deleted_count", AE.toJSON deletedCount) @@ -474,59 +489,71 @@ logsPatternExtraction :: UTCTime -> Projects.ProjectId -> ATBackgroundCtx () logsPatternExtraction scheduledTime pid = do ctx <- ask @Config.AuthContext Relude.when ctx.config.enableEventsTableUpdates $ do - tenMinutesAgo <- liftIO $ addUTCTime (-300) <$> Time.currentTime - paginate 0 tenMinutesAgo + fiveMinutesAgo <- liftIO $ addUTCTime (-300) <$> Time.currentTime + paginate 0 fiveMinutesAgo Log.logInfo "Completed logs pattern extraction for project" ("project_id", AE.toJSON pid.toText) where limitVal = 250 paginate :: Int -> UTCTime -> ATBackgroundCtx () paginate offset startTime = do - otelEvents <- PG.query [sql| SELECT kind, id::text, coalesce(body::text,''), coalesce(summary::text,'') FROM otel_logs_and_spans WHERE project_id = ? AND timestamp >= ? AND timestamp < ? AND (summary_pattern IS NULL OR log_pattern IS NULL) OFFSET ? LIMIT ?|] (pid, startTime, scheduledTime, offset, limitVal) + otelEvents :: [(Text, Text, Text, Text, Maybe Text, Maybe Text, Maybe Text)] <- PG.query [sql| SELECT kind, id::text, coalesce(body::text,''), coalesce(summary::text,''), context___trace_id, resource___service___name, level FROM otel_logs_and_spans WHERE project_id = ? AND timestamp >= ? AND timestamp < ? AND (summary_pattern IS NULL OR log_pattern IS NULL) OFFSET ? LIMIT ?|] (pid, startTime, scheduledTime, offset, limitVal) unless (null otelEvents) do Log.logInfo "Fetching events for pattern extraction" ("offset", AE.toJSON offset, "count", AE.toJSON (length otelEvents)) - let (logs, summaries) = L.partition (\(k, _, _, _) -> k == "log") otelEvents - processPatterns "log" "log_pattern" (V.fromList [(i, body) | (_, i, body, _) <- logs]) pid scheduledTime startTime - processPatterns "summary" "summary_pattern" (V.fromList [(i, s) | (_, i, _, s) <- summaries]) pid scheduledTime startTime + let (logs, _) = L.partition (\(k, _, _, _, _, _, _) -> k == "log") otelEvents + processPatterns "log" "log_pattern" (V.fromList [(i, body, trId, serviceName, level) | (_, i, body, _, trId, serviceName, level) <- logs]) pid scheduledTime startTime + -- processPatterns "summary" "summary_pattern" (V.fromList [(i, s, trId, serviceName, level) | (_, i, _, s, trId, serviceName, level) <- summaries]) pid scheduledTime startTime Log.logInfo "Completed events pattern extraction for page" ("offset", AE.toJSON offset) Relude.when (length otelEvents == limitVal) $ paginate (offset + limitVal) startTime -- | Generic pattern extraction for logs or summaries -processPatterns :: Text -> Text -> V.Vector (Text, Text) -> Projects.ProjectId -> UTCTime -> UTCTime -> ATBackgroundCtx () +-- events: (id, content, traceId, serviceName, level) +processPatterns :: Text -> Text -> V.Vector (Text, Text, Maybe Text, Maybe Text, Maybe Text) -> Projects.ProjectId -> UTCTime -> UTCTime -> ATBackgroundCtx () processPatterns kind fieldName events pid scheduledTime since = do Relude.when (not $ V.null events) $ do - let qq = [text| select $fieldName from otel_logs_and_spans where project_id= ? AND timestamp >= now() - interval '1 hour' and $fieldName is not null GROUP BY $fieldName ORDER BY count(*) desc limit 20|] - existingPatterns <- coerce @[Only Text] @[Text] <$> PG.query (Query $ encodeUtf8 qq) pid - let known = V.fromList $ map ("",) existingPatterns - combined = known <> events + existingPatterns <- LogPatterns.getLogPatternTexts pid + let known = V.fromList $ map ("",False,,Nothing,Nothing,Nothing) existingPatterns + -- Include level in content for pattern matching so different levels create different patterns + combined = known <> ((\(logId, content, trId, serviceName, level) -> (logId, True, content, trId, serviceName, level)) <$> events) drainTree = processBatch (kind == "summary") combined scheduledTime Drain.emptyDrainTree newPatterns = Drain.getAllLogGroups drainTree -- Only log if patterns were extracted Relude.when (V.length newPatterns > 0) $ Log.logInfo ("Extracted " <> kind <> " patterns") ("count", AE.toJSON $ V.length newPatterns) - forM_ newPatterns \(patternTxt, ids) -> do - let q = [text|UPDATE otel_logs_and_spans SET $fieldName = ? WHERE project_id = ? AND timestamp > ? AND id::text = ANY(?)|] - unless (V.null ids) - $ void - $ PG.execute (Query $ encodeUtf8 q) (patternTxt, pid, since, V.filter (/= "") ids) - - --- | Process a batch of (id, content) pairs through Drain -processBatch :: Bool -> V.Vector (Text, Text) -> UTCTime -> Drain.DrainTree -> Drain.DrainTree + forM_ newPatterns \(sampleMsg, fieldPath, patternTxt, ids) -> do + traceShowM patternTxt + traceShowM ids + unless (V.null ids) $ do + case kind of + "summary" -> void $ PG.execute [sql|UPDATE otel_logs_and_spans SET summary_pattern = ? WHERE project_id = ? AND timestamp > ? AND id::text = ANY(?)|] (patternTxt, pid, since, V.filter (/= "") ids) + _ -> void $ PG.execute [sql|UPDATE otel_logs_and_spans SET log_pattern = ? WHERE project_id = ? AND timestamp > ? AND id::text = ANY(?)|] (patternTxt, pid, since, V.filter (/= "") ids) + Relude.when (kind == "log" && not (T.null patternTxt) && patternTxt `notElem` existingPatterns) $ do + let (serviceName, logLevel, logTraceId) = case ids V.!? 0 of + Just logId | logId /= "" -> + case V.find (\(i, _, _, _, _) -> i == logId) events of + Just (_, _, trId, sName, lvl) -> (sName, lvl, trId) + Nothing -> (Nothing, Nothing, Nothing) + _ -> (Nothing, Nothing, Nothing) + let patternHash = toXXHash patternTxt + void $ LogPatterns.upsertLogPattern pid patternTxt patternHash serviceName logLevel logTraceId (Just sampleMsg) fieldPath + + +-- | Process a batch of (id, isSampleLog, content, serviceName, level) tuples through Drain +processBatch :: Bool -> V.Vector (Text, Bool, Text, Maybe Text, Maybe Text, Maybe Text) -> UTCTime -> Drain.DrainTree -> Drain.DrainTree processBatch isSummary batch now inTree = - V.foldl' (\tree (logId, content) -> processNewLog isSummary logId content now tree) inTree batch + V.foldl' (\tree (logId, isSampleLog, content, _, _, _) -> processNewLog isSummary logId isSampleLog content now tree) inTree batch -processNewLog :: Bool -> Text -> Text -> UTCTime -> Drain.DrainTree -> Drain.DrainTree -processNewLog isSummary logId content now tree = - let tokens = Drain.generateDrainTokens content +processNewLog :: Bool -> Text -> Bool -> Text -> UTCTime -> Drain.DrainTree -> Drain.DrainTree +processNewLog _isSummary logId isSampleLog content now tree = + let (tokens, fieldPath) = Drain.generateDrainTokens content in if V.null tokens then tree else let tokenCount = V.length tokens firstToken = V.head tokens - in Drain.updateTreeWithLog tree tokenCount firstToken tokens logId content now + in Drain.updateTreeWithLog tree tokenCount firstToken tokens logId isSampleLog content fieldPath now -- | Process errors from OpenTelemetry spans to detect runtime exceptions @@ -1637,3 +1664,93 @@ monitorStatus triggerLessThan warnThreshold alertThreshold alertRecovery warnRec where breached t = if triggerLessThan then t >= value else t <= value recovered r t = if triggerLessThan then value > fromMaybe t r else value < fromMaybe t r + + +-- ============================================================================ +-- Log Pattern Processing Jobs +-- ============================================================================ + +-- | Calculate baselines for log patterns +-- Uses hourly counts from otel_logs_and_spans over the last 7 days +calculateLogPatternBaselines :: Projects.ProjectId -> ATBackgroundCtx () +calculateLogPatternBaselines pid = do + Log.logInfo "Calculating log pattern baselines" pid + now <- Time.currentTime + -- Get all non-ignored patterns + patterns <- LogPatterns.getLogPatterns pid 1000 0 + forM_ patterns \lp -> do + -- Get hourly stats from otel_logs_and_spans over last 7 days (168 hours) + statsM <- LogPatterns.getPatternStats pid lp.logPattern 168 + case statsM of + Nothing -> pass + Just stats -> do + let newSamples = stats.totalHours + newMean = stats.hourlyMedian + newStddev = stats.hourlyMADScaled + patternAgeDays = diffUTCTime now (zonedTimeToUTC lp.createdAt) / (24 * 60 * 60) + newState = case lp.baselineState of + BSEstablished -> BSEstablished + BSLearning -> + if newMean > 100 || patternAgeDays > 1 + then BSEstablished + else BSLearning + _ <- LogPatterns.updateBaseline pid lp.patternHash newState newMean newStddev newSamples + pass + + Log.logInfo "Finished calculating log pattern baselines" (pid, length patterns) + + +-- | Detect log pattern volume spikes and create issues +-- Uses otel_logs_and_spans table for current rate calculation +detectLogPatternSpikes :: Projects.ProjectId -> Config.AuthContext -> ATBackgroundCtx () +detectLogPatternSpikes pid authCtx = do + Log.logInfo "Detecting log pattern spikes" pid + patternsWithRates <- LogPatterns.getPatternsWithCurrentRates pid + traceShowM patternsWithRates + let spikeData = flip mapMaybe patternsWithRates \lpRate -> + case (lpRate.baselineState, lpRate.baselineMean, lpRate.baselineStddev) of + (BSEstablished, Just mean, Just stddev) + | stddev > 0 -> + let currentRate = fromIntegral lpRate.currentHourCount :: Double + zScore = (currentRate - mean) / stddev + isSpike = abs zScore > 3.0 && currentRate > mean + in if isSpike then Just (lpRate.patternId, lpRate.patternHash, currentRate, mean, stddev) else Nothing + _ -> Nothing + + let spikeIds = V.fromList $ map (\(pid', _, _, _, _) -> pid') spikeData + spikePatterns <- LogPatterns.getLogPatternsByIds spikeIds + let patternMap = HM.fromList $ V.toList $ V.map (\lp -> (lp.patternHash, lp)) spikePatterns + + forM_ spikeData \(patternId, patternHash, currentRate, mean, stddev) -> do + case HM.lookup patternHash patternMap of + Just lp -> do + Log.logInfo "Log pattern spike detected" (patternId, lp.logPattern, currentRate, mean) + issue <- liftIO $ Issues.createLogPatternRateChangeIssue pid lp currentRate mean stddev "spike" + Issues.insertIssue issue + liftIO $ withResource authCtx.jobsPool \conn -> + void $ createJob conn "background_jobs" $ EnhanceIssuesWithLLM pid (V.singleton issue.id) + Log.logInfo "Created issue for log pattern spike" (pid, lp.id, issue.id) + Nothing -> pass + Log.logInfo "Finished log pattern spike detection" pid + + +-- | Process a new log pattern and create an issue +processNewLogPattern :: Projects.ProjectId -> Text -> Config.AuthContext -> ATBackgroundCtx () +processNewLogPattern pid patternHash authCtx = do + Log.logInfo "Processing new log pattern" (pid, patternHash) + totalEvents <- do + res <- PG.query [sql| SELECT count(*) from otel_logs_and_spans WHERE project_id = ? AND timestamp >= now() - interval '7 days' |] (Only pid) + case res of + [Only cnt] -> return cnt + _ -> return 0 + if totalEvents < 10000 + then Log.logInfo "Skipping new log pattern issue creation due to low event volume" (pid, patternHash, totalEvents) + else do + patternM <- LogPatterns.getLogPatternByHash pid patternHash + whenJust patternM \lp -> do + Relude.when (lp.state == LogPatterns.LPSNew) $ do + issue <- liftIO $ Issues.createLogPatternIssue pid lp + Issues.insertIssue issue + liftIO $ withResource authCtx.jobsPool \conn -> + void $ createJob conn "background_jobs" $ EnhanceIssuesWithLLM pid (V.singleton issue.id) + Log.logInfo "Created issue for new log pattern" (pid, lp.id, issue.id) diff --git a/src/Models/Apis/Issues.hs b/src/Models/Apis/Issues.hs index 9b534e4a2..302482e0d 100644 --- a/src/Models/Apis/Issues.hs +++ b/src/Models/Apis/Issues.hs @@ -23,6 +23,8 @@ module Models.Apis.Issues ( APIChangeData (..), RuntimeExceptionData (..), QueryAlertData (..), + LogPatternRateChangeData (..), + LogPatternData (..), -- * Database Operations insertIssue, @@ -44,6 +46,8 @@ module Models.Apis.Issues ( issueIdText, parseIssueType, issueTypeToText, + createLogPatternIssue, + createLogPatternRateChangeIssue, -- * AI Conversations AIConversation (..), @@ -81,6 +85,7 @@ import Effectful (Eff) import Effectful.PostgreSQL qualified as PG import Models.Apis.Anomalies (PayloadChange) import Models.Apis.Anomalies qualified as Anomalies +import Models.Apis.LogPatterns qualified as LogPatterns import Models.Apis.RequestDumps qualified as RequestDumps import Models.Projects.Projects qualified as Projects import Models.Users.Users qualified as Users @@ -104,6 +109,8 @@ data IssueType = APIChange | RuntimeException | QueryAlert + | LogPattern + | LogPatternRateChange deriving stock (Eq, Generic, Show) deriving anyclass (NFData) deriving (AE.FromJSON, AE.ToJSON) via DAE.CustomJSON '[DAE.ConstructorTagModifier '[DAE.CamelToSnake]] IssueType @@ -117,6 +124,8 @@ issueTypeToText :: IssueType -> Text issueTypeToText APIChange = "api_change" -- Maps to anomaly_type 'shape' in DB issueTypeToText RuntimeException = "runtime_exception" issueTypeToText QueryAlert = "query_alert" +issueTypeToText LogPattern = "log_pattern" +issueTypeToText LogPatternRateChange = "log_pattern_rate_change" parseIssueType :: Text -> Maybe IssueType @@ -124,6 +133,8 @@ parseIssueType "api_change" = Just APIChange parseIssueType "shape" = Just APIChange -- Handle DB anomaly_type parseIssueType "runtime_exception" = Just RuntimeException parseIssueType "query_alert" = Just QueryAlert +parseIssueType "log_pattern" = Just LogPattern +parseIssueType "log_pattern_rate_change" = Just LogPatternRateChange parseIssueType _ = Nothing @@ -197,30 +208,23 @@ data Issue = Issue , updatedAt :: ZonedTime , projectId :: Projects.ProjectId , issueType :: IssueType - , endpointHash :: Text -- For API changes, empty for others - -- Status fields + , sourceType :: Text + , targetHash :: Text + , endpointHash :: Text , acknowledgedAt :: Maybe ZonedTime , acknowledgedBy :: Maybe Users.UserId , archivedAt :: Maybe ZonedTime - , -- Issue details - title :: Text - , service :: Text + , title :: Text + , service :: Maybe Text + , environment :: Maybe Text , critical :: Bool , severity :: Text -- "critical", "warning", "info" - -- Impact metrics - , affectedRequests :: Int - , affectedClients :: Int - , errorRate :: Maybe Double - , -- Actions - recommendedAction :: Text + , recommendedAction :: Text , migrationComplexity :: Text -- "low", "medium", "high", "n/a" - -- Data payload (polymorphic based on issueType) , issueData :: Aeson AE.Value - , -- Payload changes tracking (for API changes) - requestPayloads :: Aeson [PayloadChange] + , requestPayloads :: Aeson [PayloadChange] , responsePayloads :: Aeson [PayloadChange] - , -- LLM enhancement tracking - llmEnhancedAt :: Maybe UTCTime + , llmEnhancedAt :: Maybe UTCTime , llmEnhancementVersion :: Maybe Int } deriving stock (Generic, Show) @@ -235,18 +239,18 @@ instance Default Issue where , createdAt = error "createdAt must be set" , updatedAt = error "updatedAt must be set" , projectId = def - , issueType = def + , issueType = error "issueType must be set" + , sourceType = "" + , targetHash = "" , endpointHash = "" , acknowledgedAt = Nothing , acknowledgedBy = Nothing , archivedAt = Nothing , title = "" - , service = "" + , service = Nothing + , environment = Nothing , critical = False , severity = "info" - , affectedRequests = 0 - , affectedClients = 0 - , errorRate = Nothing , recommendedAction = "" , migrationComplexity = "low" , issueData = Aeson AE.Null @@ -269,7 +273,7 @@ data IssueL = IssueL , acknowledgedBy :: Maybe Users.UserId , archivedAt :: Maybe ZonedTime , title :: Text - , service :: Text + , service :: Maybe Text , critical :: Bool , severity :: Text -- Computed in query , affectedRequests :: Int -- Will be converted from affected_payloads in query @@ -291,6 +295,9 @@ data IssueL = IssueL deriving anyclass (FromRow, NFData) +-- | Insert a single issue +-- Note: ON CONFLICT only applies to api_change issues that are open (not acknowledged/archived) +-- Other issue types will fail on duplicate inserts as intended -- | Insert a single issue -- Note: ON CONFLICT only applies to api_change issues that are open (not acknowledged/archived) -- Other issue types will fail on duplicate inserts as intended @@ -300,24 +307,18 @@ insertIssue issue = void $ PG.execute q issue q = [sql| INSERT INTO apis.issues ( - id, created_at, updated_at, project_id, issue_type, endpoint_hash, + id, created_at, updated_at, project_id, issue_type, source_type, target_hash, endpoint_hash, acknowledged_at, acknowledged_by, archived_at, - title, service, critical, severity, - affected_requests, affected_clients, error_rate, + title, service, environment, critical, severity, recommended_action, migration_complexity, issue_data, request_payloads, response_payloads, llm_enhanced_at, llm_enhancement_version ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) -ON CONFLICT (project_id, endpoint_hash) - WHERE issue_type = 'api_change' - AND acknowledged_at IS NULL - AND archived_at IS NULL - AND endpoint_hash != '' +ON CONFLICT (project_id, target_hash, issue_type) + WHERE acknowledged_at IS NULL AND archived_at IS NULL DO UPDATE SET updated_at = EXCLUDED.updated_at, - affected_requests = issues.affected_requests + EXCLUDED.affected_requests, - affected_clients = GREATEST(issues.affected_clients, EXCLUDED.affected_clients), - issue_data = issues.issue_data || EXCLUDED.issue_data + issue_data = EXCLUDED.issue_data |] @@ -351,7 +352,7 @@ selectIssues pid _typeM isAcknowledged isArchived limit offset timeRangeM sortM q = [text| SELECT id, created_at, updated_at, project_id, issue_type::text, endpoint_hash, acknowledged_at, acknowledged_by, archived_at, title, service, critical, - CASE WHEN critical THEN 'critical' ELSE 'info' END, affected_requests, affected_clients, NULL::double precision, + CASE WHEN critical THEN 'critical' ELSE 'info' END, 0::int, 0::int, NULL::double precision, recommended_action, migration_complexity, issue_data, request_payloads, response_payloads, NULL::timestamp with time zone, NULL::int, 0::bigint, updated_at FROM apis.issues WHERE project_id = ? $timefilter $ackF $archF $orderBy LIMIT ? OFFSET ? |] @@ -389,7 +390,6 @@ updateIssueWithNewAnomaly issueId newData = void $ PG.execute q (Aeson newData, |] --- | Update issue enhancement updateIssueEnhancement :: DB es => IssueId -> Text -> Text -> Text -> Eff es () updateIssueEnhancement issueId title action complexity = void $ PG.execute q params where @@ -464,16 +464,16 @@ createAPIChangeIssue projectId endpointHash anomalies = do , projectId = projectId , issueType = APIChange , endpointHash = endpointHash + , sourceType = "" + , targetHash = "" + , environment = Nothing , acknowledgedAt = Nothing , acknowledgedBy = Nothing , archivedAt = Nothing , title = "API structure has changed" - , service = Anomalies.detectService Nothing firstAnomaly.endpointUrlPath + , service = Just $ Anomalies.detectService Nothing firstAnomaly.endpointUrlPath , critical = isCritical , severity = if isCritical then "critical" else "warning" - , affectedRequests = 0 - , affectedClients = 0 - , errorRate = Nothing , recommendedAction = "Review the API changes and update your integration accordingly." , migrationComplexity = if breakingChanges > 5 then "high" else if breakingChanges > 0 then "medium" else "low" , issueData = Aeson $ AE.toJSON apiChangeData @@ -510,16 +510,16 @@ createRuntimeExceptionIssue projectId atError = do , projectId = projectId , issueType = RuntimeException , endpointHash = fromMaybe "" atError.hash + , sourceType = "" + , targetHash = "" + , environment = Nothing , acknowledgedAt = Nothing , acknowledgedBy = Nothing , archivedAt = Nothing , title = atError.rootErrorType <> ": " <> T.take 100 atError.message - , service = fromMaybe "unknown-service" atError.serviceName + , service = atError.serviceName , critical = True , severity = "critical" - , affectedRequests = 1 - , affectedClients = 0 - , errorRate = Nothing , recommendedAction = "Investigate the error and implement a fix." , migrationComplexity = "n/a" , issueData = Aeson $ AE.toJSON exceptionData @@ -556,16 +556,16 @@ createQueryAlertIssue projectId queryId queryName queryExpr threshold actual thr , projectId = projectId , issueType = QueryAlert , endpointHash = "" + , sourceType = "" + , targetHash = "" + , environment = Nothing , acknowledgedAt = Nothing , acknowledgedBy = Nothing , archivedAt = Nothing , title = queryName <> " threshold " <> thresholdType <> " " <> show threshold - , service = "Monitoring" + , service = Just "Monitoring" , critical = True , severity = "warning" - , affectedRequests = 0 - , affectedClients = 0 - , errorRate = Nothing , recommendedAction = "Review the query results and take appropriate action." , migrationComplexity = "n/a" , issueData = Aeson $ AE.toJSON alertData @@ -663,3 +663,124 @@ slackThreadToConversationId cid ts = textToConversationId (cid <> ":" <> ts) discordThreadToConversationId :: Text -> UUIDId "conversation" discordThreadToConversationId = textToConversationId + + +-- | Create an issue for a log pattern rate change +createLogPatternRateChangeIssue :: Projects.ProjectId -> LogPatterns.LogPattern -> Double -> Double -> Double -> Text -> IO Issue +createLogPatternRateChangeIssue projectId lp currentRate baselineMean baselineStddev direction = do + now <- getCurrentTime + let zScoreVal = if baselineStddev > 0 then abs (currentRate - baselineMean) / baselineStddev else 0 + changePercentVal = if baselineMean > 0 then abs ((currentRate / baselineMean) - 1) * 100 else 0 + rateChangeData = + LogPatternRateChangeData + { patternHash = lp.patternHash + , logPattern = lp.logPattern + , sampleMessage = lp.sampleMessage + , logLevel = lp.logLevel + , serviceName = lp.serviceName + , currentRatePerHour = currentRate + , baselineMean = baselineMean + , baselineStddev = baselineStddev + , zScore = zScoreVal + , changePercent = changePercentVal + , changeDirection = direction + , detectedAt = now + } + severity = + if + | direction == "spike" && lp.logLevel == Just "error" -> "critical" + | direction == "spike" -> "warning" + | otherwise -> "info" + mkIssue projectId LogPatternRateChange lp.patternHash lp.patternHash lp.serviceName (direction == "spike" && lp.logLevel == Just "error") severity ("Log Pattern " <> T.toTitle direction <> ": " <> T.take 60 lp.logPattern <> " (" <> show (round changePercentVal :: Int) <> "%)") ("Log pattern volume " <> direction <> " detected. Current: " <> show (round currentRate :: Int) <> "/hr, Baseline: " <> show (round baselineMean :: Int) <> "/hr (" <> show (round zScoreVal :: Int) <> " std devs).") "n/a" rateChangeData + + +-- | Create an issue for a new log pattern +createLogPatternIssue :: Projects.ProjectId -> LogPatterns.LogPattern -> IO Issue +createLogPatternIssue projectId lp = do + now <- getCurrentTime + let logPatternData = + LogPatternData + { patternHash = lp.patternHash + , logPattern = lp.logPattern + , sampleMessage = lp.sampleMessage + , logLevel = lp.logLevel + , serviceName = lp.serviceName + , firstSeenAt = now + , occurrenceCount = fromIntegral lp.occurrenceCount + } + severity = case lp.logLevel of + Just "error" -> "critical" + Just "warning" -> "warning" + _ -> "info" + mkIssue projectId LogPattern lp.patternHash lp.patternHash lp.serviceName (lp.logLevel == Just "error") severity ("New Log Pattern: " <> T.take 100 lp.logPattern) "A new log pattern has been detected. Review to ensure it's expected behavior." "n/a" logPatternData + + +-- | Log Pattern issue data (new pattern detected) +data LogPatternData = LogPatternData + { patternHash :: Text + , logPattern :: Text + , sampleMessage :: Maybe Text + , logLevel :: Maybe Text + , serviceName :: Maybe Text + , firstSeenAt :: UTCTime + , occurrenceCount :: Int + } + deriving stock (Generic, Show) + deriving anyclass (NFData) + deriving (FromField, ToField) via Aeson LogPatternData + deriving (AE.FromJSON, AE.ToJSON) via DAE.CustomJSON '[DAE.OmitNothingFields, DAE.FieldLabelModifier '[DAE.CamelToSnake]] LogPatternData + + +-- | Log Pattern Rate Change issue data (volume spike/drop) +data LogPatternRateChangeData = LogPatternRateChangeData + { patternHash :: Text + , logPattern :: Text + , sampleMessage :: Maybe Text + , logLevel :: Maybe Text + , serviceName :: Maybe Text + , currentRatePerHour :: Double + , baselineMean :: Double + , baselineStddev :: Double + , zScore :: Double -- standard deviations from baseline + , changePercent :: Double -- percentage change from baseline + , changeDirection :: Text -- "spike" or "drop" + , detectedAt :: UTCTime + } + deriving stock (Generic, Show) + deriving anyclass (NFData) + deriving (FromField, ToField) via Aeson LogPatternRateChangeData + deriving (AE.FromJSON, AE.ToJSON) via DAE.CustomJSON '[DAE.OmitNothingFields, DAE.FieldLabelModifier '[DAE.CamelToSnake]] LogPatternRateChangeData + + +-- | Helper to create an issue with common defaults +mkIssue :: AE.ToJSON a => Projects.ProjectId -> IssueType -> Text -> Text -> Maybe Text -> Bool -> Text -> Text -> Text -> Text -> a -> IO Issue +mkIssue projectId issueType targetHash endpointHash service critical severity title recommendedAction migrationComplexity issueData = do + issueId <- UUIDId <$> UUID4.nextRandom + now <- getCurrentTime + zonedNow <- utcToLocalZonedTime now + pure + Issue + { id = issueId + , createdAt = zonedNow + , updatedAt = zonedNow + , projectId = projectId + , issueType = issueType + , sourceType = issueTypeToText issueType + , targetHash = targetHash + , endpointHash = endpointHash + , acknowledgedAt = Nothing + , acknowledgedBy = Nothing + , archivedAt = Nothing + , title = title + , service = service + , environment = Nothing + , critical = critical + , severity = severity + , recommendedAction = recommendedAction + , migrationComplexity = migrationComplexity + , issueData = Aeson $ AE.toJSON issueData + , requestPayloads = Aeson [] + , responsePayloads = Aeson [] + , llmEnhancedAt = Nothing + , llmEnhancementVersion = Nothing + } diff --git a/src/Models/Apis/Issues/Enhancement.hs b/src/Models/Apis/Issues/Enhancement.hs index f9f0e4605..ac3d8fa4a 100644 --- a/src/Models/Apis/Issues/Enhancement.hs +++ b/src/Models/Apis/Issues/Enhancement.hs @@ -14,6 +14,7 @@ import Data.Vector qualified as V import Database.PostgreSQL.Simple.Newtypes (Aeson (..), getAeson) import Effectful (Eff, (:>)) import Models.Apis.Issues qualified as Issues +import NeatInterpolation (text) import Pkg.AI qualified as AI import Relude hiding (id) import System.Config (AuthContext (..), EnvConfig (..)) @@ -97,67 +98,78 @@ buildTitlePrompt issue = let Aeson issueDataValue = issue.issueData in case AE.fromJSON issueDataValue of AE.Success (apiData :: Issues.APIChangeData) -> - "Generate a concise, descriptive title for this API change.\n" - <> "Endpoint: " - <> apiData.endpointMethod - <> " " - <> apiData.endpointPath - <> "\n" - <> "New fields: " - <> toText (show $ V.length apiData.newFields) - <> "\n" - <> "Deleted fields: " - <> toText (show $ V.length apiData.deletedFields) - <> "\n" - <> "Modified fields: " - <> toText (show $ V.length apiData.modifiedFields) - <> "\n" - <> "Service: " - <> issue.service + let endpoint = apiData.endpointMethod <> " " <> apiData.endpointPath + newFields = toText (show $ V.length apiData.newFields) + deletedFields = toText (show $ V.length apiData.deletedFields) + modifiedFields = toText (show $ V.length apiData.modifiedFields) + service = fromMaybe "unknown-service" issue.service + in [text| + Generate a concise, descriptive title for this API change. + Endpoint: $endpoint + New fields: $newFields + Deleted fields: $deletedFields + Modified fields: $modifiedFields + Service: $service + |] _ -> "Generate a concise title for this API change." Issues.RuntimeException -> let Aeson issueDataValue = issue.issueData in case AE.fromJSON issueDataValue of AE.Success (errorData :: Issues.RuntimeExceptionData) -> - "Generate a concise title for this runtime exception.\n" - <> "Error type: " - <> errorData.errorType - <> "\n" - <> "Error message: " - <> T.take 100 errorData.errorMessage - <> "\n" - <> "Service: " - <> issue.service + let errorType = errorData.errorType + errorMessage = T.take 100 errorData.errorMessage + service = fromMaybe "unknown-service" issue.service + in [text| + Generate a concise title for this runtime exception. + Error type: $errorType + Error message: $errorMessage + Service: $service + |] _ -> "Generate a concise title for this runtime exception." Issues.QueryAlert -> let Aeson issueDataValue = issue.issueData in case AE.fromJSON issueDataValue of AE.Success (alertData :: Issues.QueryAlertData) -> - "Generate a concise title for this query alert.\n" - <> "Query: " - <> alertData.queryName - <> "\n" - <> "Threshold: " - <> toText (show alertData.thresholdValue) - <> " (" - <> alertData.thresholdType - <> ")\n" - <> "Actual value: " - <> toText (show alertData.actualValue) + let queryName = alertData.queryName + thresholdValue = toText (show alertData.thresholdValue) + thresholdType = alertData.thresholdType + actualValue = toText (show alertData.actualValue) + in [text| + Generate a concise title for this query alert. + Query: $queryName + Threshold: $thresholdValue ($thresholdType) + Actual value: $actualValue + |] _ -> "Generate a concise title for this query alert." + Issues.LogPattern -> + let title = issue.title + service = fromMaybe "unknown-service" issue.service + in [text| + Generate a concise title for this log pattern issue. + Title: $title + Service: $service + |] + Issues.LogPatternRateChange -> + let title = issue.title + service = fromMaybe "unknown-service" issue.service + in [text| + Generate a concise title for this log pattern rate change. + Title: $title + Service: $service + |] systemPrompt = - unlines - [ "You are an API monitoring assistant. Generate clear, actionable titles for API issues." - , "Keep titles under 80 characters." - , "Focus on the impact and what changed." - , "Use present tense and active voice." - , "Examples:" - , "- 'New User Authentication Endpoint Added to Auth Service'" - , "- 'Breaking Change: 5 Required Fields Removed from Order Response'" - , "- 'Payment Service Schema Updated with 3 New Optional Fields'" - , "- 'Critical: NullPointerException in Cart Service Checkout Flow'" - ] + [text| + You are an API monitoring assistant. Generate clear, actionable titles for API issues. + Keep titles under 80 characters. + Focus on the impact and what changed. + Use present tense and active voice. + Examples: + - 'New User Authentication Endpoint Added to Auth Service' + - 'Breaking Change: 5 Required Fields Removed from Order Response' + - 'Payment Service Schema Updated with 3 New Optional Fields' + - 'Critical: NullPointerException in Cart Service Checkout Flow' + |] in systemPrompt <> "\n\n" <> baseContext @@ -169,90 +181,94 @@ buildDescriptionPrompt issue = let Aeson issueDataValue = issue.issueData in case AE.fromJSON issueDataValue of AE.Success (apiData :: Issues.APIChangeData) -> - "Describe this API change and its impact.\n" - <> "Endpoint: " - <> apiData.endpointMethod - <> " " - <> apiData.endpointPath - <> "\n" - <> "New fields: " - <> toText (show $ V.toList apiData.newFields) - <> "\n" - <> "Deleted fields: " - <> toText (show $ V.toList apiData.deletedFields) - <> "\n" - <> "Modified fields: " - <> toText (show $ V.toList apiData.modifiedFields) - <> "\n" - <> "Total anomalies grouped: " - <> toText (show $ V.length apiData.anomalyHashes) - <> "\n" - <> "Service: " - <> issue.service + let endpoint = apiData.endpointMethod <> " " <> apiData.endpointPath + newFields = toText (show $ V.toList apiData.newFields) + deletedFields = toText (show $ V.toList apiData.deletedFields) + modifiedFields = toText (show $ V.toList apiData.modifiedFields) + totalAnomalies = toText (show $ V.length apiData.anomalyHashes) + service = fromMaybe "unknown-service" issue.service + in [text| + Describe this API change and its impact. + Endpoint: $endpoint + New fields: $newFields + Deleted fields: $deletedFields + Modified fields: $modifiedFields + Total anomalies grouped: $totalAnomalies + Service: $service + |] _ -> "Describe this API change and its implications." Issues.RuntimeException -> let Aeson issueDataValue = issue.issueData in case AE.fromJSON issueDataValue of AE.Success (errorData :: Issues.RuntimeExceptionData) -> - "Analyze this runtime exception and provide debugging guidance.\n" - <> "Error type: " - <> errorData.errorType - <> "\n" - <> "Error message: " - <> errorData.errorMessage - <> "\n" - <> "Stack trace: " - <> T.take 500 errorData.stackTrace - <> "\n" - <> "Request context: " - <> fromMaybe "Unknown" errorData.requestMethod - <> " " - <> fromMaybe "Unknown" errorData.requestPath - <> "\n" - <> "Occurrences: " - <> toText (show errorData.occurrenceCount) + let errorType = errorData.errorType + errorMessage = errorData.errorMessage + stackTrace = T.take 500 errorData.stackTrace + requestContext = fromMaybe "Unknown" errorData.requestMethod <> " " <> fromMaybe "Unknown" errorData.requestPath + occurrences = toText (show errorData.occurrenceCount) + in [text| + Analyze this runtime exception and provide debugging guidance. + Error type: $errorType + Error message: $errorMessage + Stack trace: $stackTrace + Request context: $requestContext + Occurrences: $occurrences + |] _ -> "Analyze this runtime exception." Issues.QueryAlert -> case AE.fromJSON (getAeson issue.issueData) of AE.Success (alertData :: Issues.QueryAlertData) -> - "Describe this query alert and recommended actions.\n" - <> "Query: " - <> alertData.queryName - <> "\n" - <> "Expression: " - <> alertData.queryExpression - <> "\n" - <> "Threshold: " - <> toText (show alertData.thresholdValue) - <> " (" - <> alertData.thresholdType - <> ")\n" - <> "Actual value: " - <> toText (show alertData.actualValue) - <> "\n" - <> "Triggered at: " - <> toText (show alertData.triggeredAt) + let queryName = alertData.queryName + queryExpression = alertData.queryExpression + thresholdValue = toText (show alertData.thresholdValue) + thresholdType = alertData.thresholdType + actualValue = toText (show alertData.actualValue) + triggeredAt = toText (show alertData.triggeredAt) + in [text| + Describe this query alert and recommended actions. + Query: $queryName + Expression: $queryExpression + Threshold: $thresholdValue ($thresholdType) + Actual value: $actualValue + Triggered at: $triggeredAt + |] _ -> "Describe this query alert." + Issues.LogPattern -> + let title = issue.title + service = fromMaybe "unknown-service" issue.service + in [text| + Describe this log pattern issue and its implications. + Title: $title + Service: $service + |] + Issues.LogPatternRateChange -> + let title = issue.title + service = fromMaybe "unknown-service" issue.service + in [text| + Describe this log pattern rate change and its implications. + Title: $title + Service: $service + |] systemPrompt = - unlines - [ "You are an API monitoring assistant. Generate detailed descriptions for API issues." - , "Structure your response in exactly 3 lines:" - , "Line 1: A clear description of what changed and why it matters (1-2 sentences)" - , "Line 2: Recommended action for developers (1 sentence)" - , "Line 3: Migration complexity: 'low', 'medium', or 'high'" - , "" - , "Guidelines:" - , "- Be specific about the impact on API consumers" - , "- Mention backward compatibility concerns" - , "- Provide actionable recommendations" - , "- Consider both immediate and long-term implications" - , "" - , "Example response:" - , "The /api/v1/orders endpoint schema has been updated with 3 new required fields (customerId, shippingAddress, paymentMethod), breaking backward compatibility for existing integrations." - , "Update your API clients to include the new required fields before the deprecation deadline, and implement proper validation for the new schema." - , "high" - ] + [text| + You are an API monitoring assistant. Generate detailed descriptions for API issues. + Structure your response in exactly 3 lines: + Line 1: A clear description of what changed and why it matters (1-2 sentences) + Line 2: Recommended action for developers (1 sentence) + Line 3: Migration complexity: 'low', 'medium', or 'high' + + Guidelines: + - Be specific about the impact on API consumers + - Mention backward compatibility concerns + - Provide actionable recommendations + - Consider both immediate and long-term implications + + Example response: + The /api/v1/orders endpoint schema has been updated with 3 new required fields (customerId, shippingAddress, paymentMethod), breaking backward compatibility for existing integrations. + Update your API clients to include the new required fields before the deprecation deadline, and implement proper validation for the new schema. + high + |] in systemPrompt <> "\n\n" <> baseContext @@ -298,28 +314,32 @@ buildCriticalityPrompt issue = "Runtime exception: " <> issue.title Issues.QueryAlert -> "Query alert: " <> issue.title + Issues.LogPattern -> + "Log pattern: " <> issue.title + Issues.LogPatternRateChange -> + "Log pattern rate change: " <> issue.title systemPrompt = - unlines - [ "You are an API monitoring assistant. Analyze this API change and classify it." - , "Respond with exactly 3 lines:" - , "Line 1: 'critical' or 'safe' - Is this change critical?" - , "Line 2: Number of breaking changes (integer)" - , "Line 3: Number of incremental/safe changes (integer)" - , "" - , "Critical changes include:" - , "- Removing required fields" - , "- Changing field types incompatibly" - , "- Removing endpoints" - , "- Authentication/authorization changes" - , "- Runtime exceptions in core functionality" - , "" - , "Safe changes include:" - , "- Adding optional fields" - , "- New endpoints" - , "- Additional response data" - , "- Non-breaking format updates" - ] + [text| + You are an API monitoring assistant. Analyze this API change and classify it. + Respond with exactly 3 lines: + Line 1: 'critical' or 'safe' - Is this change critical? + Line 2: Number of breaking changes (integer) + Line 3: Number of incremental/safe changes (integer) + + Critical changes include: + - Removing required fields + - Changing field types incompatibly + - Removing endpoints + - Authentication/authorization changes + - Runtime exceptions in core functionality + + Safe changes include: + - Adding optional fields + - New endpoints + - Additional response data + - Non-breaking format updates + |] in systemPrompt <> "\n\n" <> context diff --git a/src/Models/Apis/LogPatterns.hs b/src/Models/Apis/LogPatterns.hs new file mode 100644 index 000000000..67cb0f8fd --- /dev/null +++ b/src/Models/Apis/LogPatterns.hs @@ -0,0 +1,328 @@ +module Models.Apis.LogPatterns ( + LogPattern (..), + LogPatternId, + LogPatternState (..), + getLogPatterns, + getLogPatternTexts, + getLogPatternByHash, + acknowledgeLogPatterns, + upsertLogPattern, + updateLogPatternStats, + updateBaseline, + -- Pattern stats from otel_logs_and_spans + PatternStats (..), + getPatternStats, + getCurrentHourPatternCount, + -- Pattern with current rate for spike detection + LogPatternWithRate (..), + getPatternsWithCurrentRates, + getLogPatternById, + getLogPatternsByIds, +) +where + +import Data.Aeson qualified as AE +import Data.List (lookup) +import Data.Text qualified as T +import Data.Time +import Data.UUID qualified as UUID +import Data.Vector qualified as V +import Database.PostgreSQL.Entity (_selectWhere) +import Database.PostgreSQL.Entity.Types (CamelToSnake, Entity, FieldModifiers, GenericEntity, PrimaryKey, Schema, TableName, field) +import Database.PostgreSQL.Simple (FromRow, Only (Only), ToRow) +import Database.PostgreSQL.Simple.FromField (FromField) +import Database.PostgreSQL.Simple.SqlQQ (sql) +import Database.PostgreSQL.Simple.ToField (ToField) +import Deriving.Aeson qualified as DAE +import Effectful (Eff) +import Effectful.PostgreSQL qualified as PG +import Models.Projects.Projects qualified as Projects +import Models.Users.Users qualified as Users +import Pkg.DBUtils (WrappedEnumSC (..)) +import Pkg.DeriveUtils (BaselineState (..)) +import Relude hiding (id) +import System.Types (DB) + + +newtype LogPatternId = LogPatternId {unLogPatternId :: Int64} + deriving stock (Generic, Show) + deriving newtype (AE.FromJSON, AE.ToJSON, Eq, FromField, NFData, Ord, ToField) + + +data LogPatternState + = LPSNew + | LPSAcknowledged + | LPSIgnored + deriving stock (Eq, Generic, Read, Show) + deriving anyclass (NFData) + deriving + (AE.FromJSON, AE.ToJSON) + via DAE.CustomJSON '[DAE.OmitNothingFields, DAE.FieldLabelModifier '[DAE.CamelToSnake]] LogPatternState + deriving (FromField, ToField) via WrappedEnumSC "LPS" LogPatternState + + +data LogPattern = LogPattern + { id :: LogPatternId + , projectId :: Projects.ProjectId + , createdAt :: ZonedTime + , updatedAt :: ZonedTime + , logPattern :: Text + , patternHash :: Text + , serviceName :: Maybe Text + , logLevel :: Maybe Text + , sampleMessage :: Maybe Text + , firstSeenAt :: ZonedTime + , lastSeenAt :: ZonedTime + , occurrenceCount :: Int64 + , state :: LogPatternState + , acknowledgedBy :: Maybe Users.UserId + , acknowledgedAt :: Maybe ZonedTime + , baselineState :: BaselineState + , baselineVolumeHourlyMean :: Maybe Double + , baselineVolumeHourlyStddev :: Maybe Double + , baselineSamples :: Int + , baselineUpdatedAt :: Maybe ZonedTime + , fieldPath :: Text + } + deriving stock (Generic, Show) + deriving anyclass (FromRow, NFData, ToRow) + deriving + (Entity) + via (GenericEntity '[Schema "apis", TableName "log_patterns", PrimaryKey "id", FieldModifiers '[CamelToSnake]] LogPattern) + deriving + (AE.FromJSON, AE.ToJSON) + via DAE.CustomJSON '[DAE.OmitNothingFields, DAE.FieldLabelModifier '[DAE.CamelToSnake]] LogPattern + + +-- | Get all log patterns for a project +getLogPatterns :: DB es => Projects.ProjectId -> Int -> Int -> Eff es [LogPattern] +getLogPatterns pid limit offset = PG.query q (pid, limit, offset) + where + q = + [sql| + SELECT id, project_id, created_at, updated_at, log_pattern, pattern_hash, + service_name, log_level, sample_message, first_seen_at, last_seen_at, + occurrence_count, state, acknowledged_by, acknowledged_at, + baseline_state, baseline_volume_hourly_mean, baseline_volume_hourly_stddev, + baseline_samples, baseline_updated_at, field_path + FROM apis.log_patterns + WHERE project_id = ? + ORDER BY last_seen_at DESC + LIMIT ? OFFSET ? + |] + + +getLogPatternTexts :: DB es => Projects.ProjectId -> Eff es [Text] +getLogPatternTexts pid = coerce @[Only Text] @[Text] <$> PG.query q (Only pid) + where + q = [sql| SELECT log_pattern FROM apis.log_patterns WHERE project_id = ? AND state != 'ignored' |] + + +-- | Get log pattern by hash +getLogPatternByHash :: DB es => Projects.ProjectId -> Text -> Eff es (Maybe LogPattern) +getLogPatternByHash pid hash = listToMaybe <$> PG.query (_selectWhere @LogPattern [[field| project_id |], [field| pattern_hash |]]) (pid, hash) + + +-- | Acknowledge log patterns +acknowledgeLogPatterns :: DB es => Users.UserId -> V.Vector Text -> Eff es Int64 +acknowledgeLogPatterns uid patternHashes + | V.null patternHashes = pure 0 + | otherwise = PG.execute q (uid, patternHashes) + where + q = + [sql| + UPDATE apis.log_patterns + SET state = 'acknowledged', acknowledged_by = ?, acknowledged_at = NOW() + WHERE pattern_hash = ANY(?) + |] + + +upsertLogPattern :: DB es => Projects.ProjectId -> Text -> Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Text -> Text -> Eff es Int64 +upsertLogPattern pid pat patHash serviceName logLevel trId sampleMsg fieldPath = + PG.execute q (pid, pat, patHash, serviceName, logLevel, trId, sampleMsg, fieldPath) + where + q = + [sql| + INSERT INTO apis.log_patterns (project_id, log_pattern, pattern_hash, service_name, log_level, trace_id, sample_message, field_path) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (project_id, log_level, field_path, pattern_hash) DO UPDATE SET + last_seen_at = NOW(), + occurrence_count = apis.log_patterns.occurrence_count + 1, + sample_message = COALESCE(EXCLUDED.sample_message, apis.log_patterns.sample_message), + service_name = COALESCE(EXCLUDED.service_name, apis.log_patterns.service_name), + trace_id = COALESCE(EXCLUDED.trace_id, apis.log_patterns.trace_id) + |] + + +-- | Update log pattern statistics (occurrence count, last seen) +updateLogPatternStats :: DB es => Projects.ProjectId -> Text -> Int64 -> Eff es Int64 +updateLogPatternStats pid patHash additionalCount = + PG.execute q (additionalCount, pid, patHash) + where + q = + [sql| + UPDATE apis.log_patterns + SET occurrence_count = occurrence_count + ?, + last_seen_at = NOW() + WHERE project_id = ? AND pattern_hash = ? + |] + + +updateBaseline :: DB es => Projects.ProjectId -> Text -> BaselineState -> Double -> Double -> Int -> Eff es Int64 +updateBaseline pid patHash bState hourlyMean hourlyStddev samples = + PG.execute q (bState, hourlyMean, hourlyStddev, samples, pid, patHash) + where + q = + [sql| + UPDATE apis.log_patterns + SET baseline_state = ?, + baseline_volume_hourly_mean = ?, + baseline_volume_hourly_stddev = ?, + baseline_samples = ?, + baseline_updated_at = NOW() + WHERE project_id = ? AND pattern_hash = ? + |] + + +-- | Stats for a log pattern from otel_logs_and_spans +-- Using median + MAD instead of mean + stddev for robustness against outliers/spikes +data PatternStats = PatternStats + { hourlyMedian :: Double -- Actually stores median for robustness + , hourlyMADScaled :: Double -- Actually stores MAD * 1.4826 (scaled to be comparable to stddev) + , totalHours :: Int + , totalEvents :: Int + } + deriving stock (Generic, Show) + deriving anyclass (FromRow) + + +-- | Get pattern stats from otel_logs_and_spans +-- Returns median and MAD (Median Absolute Deviation) for robust baseline calculation +getPatternStats :: DB es => Projects.ProjectId -> Text -> Int -> Eff es (Maybe PatternStats) +getPatternStats pid pattern' hoursBack = do + results <- PG.query q (pid, pattern', hoursBack) + return $ listToMaybe results + where + q = + [sql| + WITH hourly_counts AS ( + SELECT + date_trunc('hour', timestamp) AS hour_start, + COUNT(*) AS event_count + FROM otel_logs_and_spans + WHERE project_id = ?::text + AND log_pattern = ? + AND timestamp >= NOW() - INTERVAL '1 hour' * ? + GROUP BY date_trunc('hour', timestamp) + ), + median_calc AS ( + SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY event_count) AS median_val + FROM hourly_counts + ), + mad_calc AS ( + SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ABS(hc.event_count - mc.median_val)) AS mad_val + FROM hourly_counts hc, median_calc mc + ) + SELECT + COALESCE(mc.median_val, 0)::FLOAT AS hourly_median, + COALESCE(mad.mad_val * 1.4826, 0)::FLOAT AS hourly_mad_scaled, + (SELECT COUNT(*)::INT FROM hourly_counts) AS total_hours, + (SELECT COALESCE(SUM(event_count), 0)::INT FROM hourly_counts) AS total_events + FROM median_calc mc, mad_calc mad + |] + + +-- | Get current hour count for a pattern +getCurrentHourPatternCount :: DB es => Projects.ProjectId -> Text -> Eff es Int +getCurrentHourPatternCount pid pattern' = do + results <- PG.query q (pid, pattern') + case results of + [Only count] -> return count + _ -> return 0 + where + q = + [sql| + SELECT COUNT(*)::INT + FROM otel_logs_and_spans + WHERE project_id = ?::text + AND log_pattern = ? + AND timestamp >= date_trunc('hour', NOW()) + |] + + +-- | Log pattern with current rate (for batch spike detection) +data LogPatternWithRate = LogPatternWithRate + { patternId :: LogPatternId + , projectId :: Projects.ProjectId + , logPattern :: Text + , patternHash :: Text + , baselineState :: BaselineState + , baselineMean :: Maybe Double + , baselineStddev :: Maybe Double + , currentHourCount :: Int + } + deriving stock (Generic, Show) + deriving anyclass (FromRow) + + +-- | Get all patterns with their current hour counts +getPatternsWithCurrentRates :: DB es => Projects.ProjectId -> Eff es [LogPatternWithRate] +getPatternsWithCurrentRates pid = do + patterns <- PG.query patternsQuery (Only pid) + counts :: [(Text, Int)] <- PG.query countsQuery (Only pid) + + traceShowM counts + traceShowM patterns + + pure $ map (attachCount counts) patterns + where + patternsQuery = + [sql| + SELECT + id, + project_id, + log_pattern, + pattern_hash, + baseline_state, + baseline_volume_hourly_mean, + baseline_volume_hourly_stddev + FROM apis.log_patterns + WHERE project_id = ? + AND state != 'ignored' + AND baseline_state = 'established' + |] + + countsQuery = + [sql| + SELECT log_pattern, COUNT(*)::INT + FROM otel_logs_and_spans + WHERE project_id = ?::text + AND timestamp >= now() - interval '1 hour' + AND kind = 'log' + AND log_pattern IS NOT NULL + GROUP BY log_pattern + |] + + attachCount counts (patId, projId, logPat, patHash, blState, blMean, blStddev) = + LogPatternWithRate + { patternId = patId + , projectId = projId + , logPattern = logPat + , patternHash = patHash + , baselineState = blState + , baselineMean = blMean + , baselineStddev = blStddev + , currentHourCount = fromMaybe 0 $ lookup logPat counts + } + + +-- | Get a pattern by ID +getLogPatternById :: DB es => LogPatternId -> Eff es (Maybe LogPattern) +getLogPatternById lpid = listToMaybe <$> PG.query (_selectWhere @LogPattern [[field| id |]]) (Only lpid) + + +-- | Get multiple patterns by IDs in a single query (avoids N+1) +getLogPatternsByIds :: DB es => V.Vector LogPatternId -> Eff es (V.Vector LogPattern) +getLogPatternsByIds ids + | V.null ids = pure V.empty + | otherwise = V.fromList <$> PG.query [sql| SELECT * FROM apis.log_patterns WHERE id = ANY(?) |] (Only ids) diff --git a/src/Models/Apis/RequestDumps.hs b/src/Models/Apis/RequestDumps.hs index 14e503f94..6a76a0500 100644 --- a/src/Models/Apis/RequestDumps.hs +++ b/src/Models/Apis/RequestDumps.hs @@ -46,6 +46,7 @@ import Effectful.Log (Log) import Effectful.PostgreSQL qualified as PG import Effectful.Time qualified as Time import Models.Apis.Fields.Types () +import Models.Apis.LogPatterns qualified as LogPatterns import Models.Projects.Projects qualified as Projects import NeatInterpolation (text) import Pkg.DBUtils (WrappedEnumShow (..)) @@ -566,8 +567,21 @@ fetchLogPatterns pid queryAST dateRange sourceM targetM skip = do pidTxt = pid.toText whereCondition = fromMaybe [text|project_id=${pidTxt}|] queryComponents.whereClause target = fromMaybe "log_pattern" targetM - q = [text|select $target, count(*) as p_count from otel_logs_and_spans where project_id='${pidTxt}' and ${whereCondition} and $target is not null GROUP BY $target ORDER BY p_count desc offset ? limit 15;|] - PG.query (Query $ encodeUtf8 q) (Only skip) + if target == "log_pattern" + then do + activePatterns <- V.fromList <$> LogPatterns.getLogPatternTexts pid + let q = + [text| + SELECT log_pattern, count(*) as p_count + FROM otel_logs_and_spans + WHERE ${whereCondition} AND log_pattern = ANY(?) + GROUP BY log_pattern ORDER BY p_count DESC OFFSET ? LIMIT 15 + |] + PG.query (Query $ encodeUtf8 q) (activePatterns, skip) + else do + -- For other targets (e.g., summary_pattern), use the original query + let q = [text|select $target, count(*) as p_count from otel_logs_and_spans where project_id='${pidTxt}' and ${whereCondition} and $target is not null GROUP BY $target ORDER BY p_count desc offset ? limit 15;|] + PG.query (Query $ encodeUtf8 q) (Only skip) getLast24hTotalRequest :: DB es => Projects.ProjectId -> Eff es Int diff --git a/src/Pages/Anomalies.hs b/src/Pages/Anomalies.hs index dbddb7454..4e2027255 100644 --- a/src/Pages/Anomalies.hs +++ b/src/Pages/Anomalies.hs @@ -257,7 +257,7 @@ anomalyDetailPage :: Projects.ProjectId -> Issues.Issue -> Maybe Telemetry.Trace anomalyDetailPage pid issue tr otellogs errM now isFirst = do let spanRecs = V.catMaybes $ Telemetry.convertOtelLogsAndSpansToSpanRecord <$> otellogs issueId = UUID.toText issue.id.unUUIDId - div_ [class_ "pt-8 mx-auto px-4 w-full flex flex-col gap-4 h-full overflow-auto pb-32"] do + div_ [class_ "pt-8 mx-auto px-4 w-full flex flex-col gap-4 overflow-auto pb-32"] do -- Header div_ [class_ "flex flex-col gap-3"] do div_ [class_ "flex gap-2 flex-wrap items-center"] do @@ -268,37 +268,37 @@ anomalyDetailPage pid issue tr otellogs errM now isFirst = do _ -> pass h3_ [class_ "text-textStrong text-2xl font-semibold"] $ toHtml issue.title p_ [class_ "text-sm text-textWeak max-w-3xl"] $ toHtml issue.recommendedAction - - -- Metrics & Timeline Row (8-column grid: 4 stats + chart) - div_ [class_ "grid grid-cols-4 lg:grid-cols-8 gap-4"] do - -- Stats (1 column each) - statBox_ (Just pid) Nothing "Affected Requests" "" (show issue.affectedRequests) Nothing Nothing - statBox_ (Just pid) Nothing "Affected Clients" "" (show issue.affectedClients) Nothing Nothing - whenJust errM $ \err -> do - timeStatBox_ "First Seen" $ prettyTimeAuto now $ zonedTimeToUTC err.createdAt - timeStatBox_ "Last Seen" $ prettyTimeAuto now $ zonedTimeToUTC err.updatedAt - -- Timeline (4 columns) - div_ [class_ "col-span-4"] - $ Widget.widget_ - $ (def :: Widget.Widget) - { Widget.standalone = Just True - , Widget.id = Just $ issueId <> "-timeline" - , Widget.wType = Widget.WTTimeseries - , Widget.title = Just "Error trends" - , Widget.showTooltip = Just True - , Widget.xAxis = Just (def{Widget.showAxisLabel = Just True}) - , Widget.yAxis = Just (def{Widget.showOnlyMaxLabel = Just True}) - , Widget.query = Just "status_code == \"ERROR\" | summarize count(*) by bin_auto(timestamp), status_code" - , Widget._projectId = Just issue.projectId - , Widget.hideLegend = Just True - } + let widget title q = + div_ [class_ "col-span-4"] + $ Widget.widget_ + $ (def :: Widget.Widget) + { Widget.standalone = Just True + , Widget.id = Just $ issueId <> "-timeline" + , Widget.naked = Just True + , Widget.wType = Widget.WTTimeseries + , Widget.title = Just title + , Widget.showTooltip = Just True + , Widget.xAxis = Just (def{Widget.showAxisLabel = Just True}) + , Widget.yAxis = Just (def{Widget.showOnlyMaxLabel = Just True}) + , Widget.query = Just q + , Widget._projectId = Just issue.projectId + , Widget.hideLegend = Just True + } -- Two Column Layout - div_ [class_ "flex flex-col gap-4"] do - div_ [class_ "grid grid-cols-2 gap-4 w-full"] do - case issue.issueType of - Issues.RuntimeException -> do - case AE.fromJSON (getAeson issue.issueData) of - AE.Success (exceptionData :: Issues.RuntimeExceptionData) -> do + case issue.issueType of + Issues.RuntimeException -> do + case AE.fromJSON (getAeson issue.issueData) of + AE.Success (exceptionData :: Issues.RuntimeExceptionData) -> do + div_ [class_ "grid grid-cols-4 lg:grid-cols-8 gap-4"] do + -- Stats (1 column each) + whenJust errM $ \err -> do + statBox_ (Just pid) Nothing "Affected Requests" "" "0" Nothing Nothing + statBox_ (Just pid) Nothing "Affected Clients" "" "0" Nothing Nothing + timeStatBox_ "First Seen" $ prettyTimeAuto now $ zonedTimeToUTC err.createdAt + timeStatBox_ "Last Seen" $ prettyTimeAuto now $ zonedTimeToUTC err.updatedAt + widget "Error trend" "status_code == \"ERROR\" | summarize count(*) by bin_auto(timestamp), status_code" + div_ [class_ "flex flex-col gap-4"] do + div_ [class_ "grid grid-cols-2 gap-4 w-full"] do div_ [class_ "surface-raised rounded-2xl overflow-hidden"] do div_ [class_ "px-4 py-3 border-b border-strokeWeak flex items-center justify-between"] do div_ [class_ "flex items-center gap-2"] do @@ -325,7 +325,6 @@ anomalyDetailPage pid issue tr otellogs errM now isFirst = do div_ [] do span_ [class_ "text-xs text-textWeak"] "First seen:" span_ [class_ "ml-2 text-xs"] $ toHtml $ compactTimeAgo $ toText $ prettyTimeAuto now (zonedTimeToUTC err.createdAt) - div_ [class_ "flex items-center gap-2"] do faSprite_ "calendar" "regular" "w-3 h-3" div_ [] do @@ -343,62 +342,108 @@ anomalyDetailPage pid issue tr otellogs errM now isFirst = do div_ [] do span_ [class_ "text-sm text-textWeak"] "Service:" span_ [class_ "ml-2 text-sm"] $ toHtml $ fromMaybe "Unknown service" err.errorData.serviceName - _ -> pass - Issues.QueryAlert -> do - case AE.fromJSON (getAeson issue.issueData) of - AE.Success (alertData :: Issues.QueryAlertData) -> do - div_ [class_ "mb-4"] do - span_ [class_ "text-sm text-textWeak mb-2 block font-medium"] "Query:" - div_ [class_ "bg-fillInformation-weak border border-strokeInformation-weak rounded-lg p-3 text-sm font-mono text-fillInformation-strong max-w-2xl overflow-x-auto"] - $ toHtml alertData.queryExpression - _ -> pass _ -> pass - - div_ [class_ "surface-raised rounded-2xl overflow-hidden", id_ "error-details-container"] do - div_ [class_ "px-4 border-b border-b-strokeWeak flex items-center justify-between"] do - div_ [class_ "flex items-center gap-2"] do - faSprite_ "magnifying-glass-chart" "regular" "w-4 h-4 text-iconNeutral" - h4_ [class_ "text-textStrong text-lg font-medium"] "Investigation" - div_ [class_ "flex items-center"] do - let aUrl = "/p/" <> pid.toText <> "/anomalies/" <> issueId <> "" - a_ [href_ $ aUrl <> "?first_occurrence=true", class_ $ (if isFirst then "text-textBrand font-medium" else "text-textWeak hover:text-textStrong") <> " text-xs py-3 px-3 cursor-pointer transition-colors", term "data-tippy-content" "Show first trace the error occured"] "First" - a_ [href_ aUrl, class_ $ (if isFirst then "text-textWeak hover:text-textStrong" else "text-textBrand font-medium") <> " text-xs py-3 px-3 cursor-pointer transition-colors", term "data-tippy-content" "Show recent trace the error occured"] "Recent" - span_ [class_ "mx-4 w-px h-4 bg-strokeWeak"] pass - button_ [class_ "text-xs py-3 px-3 cursor-pointer err-tab t-tab-active font-medium", onclick_ "navigatable(this, '#span-content', '#error-details-container', 't-tab-active', 'err')"] "Trace" - button_ [class_ "text-xs py-3 px-3 cursor-pointer err-tab font-medium", onclick_ "navigatable(this, '#log-content', '#error-details-container', 't-tab-active', 'err')"] "Logs" - button_ [class_ "text-xs py-3 px-3 cursor-pointer err-tab font-medium", onclick_ "navigatable(this, '#replay-content', '#error-details-container', 't-tab-active', 'err')"] "Replay" - div_ [class_ "p-2 w-full overflow-x-hidden"] do - div_ [class_ "flex w-full err-tab-content", id_ "span-content"] do - div_ [id_ "trace_container", class_ "grow-1 max-w-[80%] w-1/2 min-w-[20%] shrink-1"] do - whenJust tr $ \t -> - tracePage pid t spanRecs - unless (isJust tr) - $ div_ [class_ "flex flex-col items-center justify-center h-48"] do - faSprite_ "inbox-full" "regular" "w-6 h-6 text-textWeak" - span_ [class_ "mt-2 text-sm text-textWeak"] "No trace data available for this error." - div_ [class_ "transition-opacity duration-200 mx-1", id_ "resizer-details_width-wrapper"] $ resizer_ "log_details_container" "details_width" False - div_ [class_ "grow-0 relative shrink-0 overflow-y-auto overflow-x-hidden max-h-[500px] w-1/2 w-c-scroll overflow-x-hidden overflow-y-auto", id_ "log_details_container"] do - span_ [class_ "htmx-indicator query-indicator absolute loading left-1/2 -translate-x-1/2 loading-dots absoute z-10 top-10", id_ "details_indicator"] "" - let (spanId, createdAt) = case spanRecs V.!? 0 of - Just sr -> (sr.uSpanId, formatUTC sr.timestamp) - Nothing -> ("", "") - let url = "/p/" <> pid.toText <> "/log_explorer/" <> spanId <> "/" <> createdAt <> "/detailed" - div_ [hxGet_ url, hxTarget_ "#log_details_container", hxSwap_ "innerHtml", hxTrigger_ "intersect one", hxIndicator_ "#details_indicator", term "hx-sync" "this:replace"] pass - - div_ [id_ "log-content", class_ "hidden err-tab-content"] do - div_ [class_ "flex flex-col gap-4"] do - virtualTable pid (Just ("/p/" <> pid.toText <> "/log_explorer?json=true&query=" <> toUriStr ("kind==\"log\" AND context___trace_id==\"" <> fromMaybe "" (errM >>= (\x -> x.recentTraceId)) <> "\""))) - - div_ [id_ "replay-content", class_ "hidden err-tab-content"] do - let withSessionIds = V.catMaybes $ V.map (\sr -> (`lookupValueText` "id") =<< Map.lookup "session" =<< sr.attributes) spanRecs - unless (V.null withSessionIds) do - let sessionId = V.head withSessionIds - div_ [class_ "border border-r border-l w-max mx-auto"] - $ termRaw "session-replay" [id_ "sessionReplay", term "initialSession" sessionId, class_ "shrink-1 flex flex-col", term "projectId" pid.toText, term "containerId" "sessionPlayerWrapper"] ("" :: Text) - - when (V.null withSessionIds) - $ div_ [class_ "flex flex-col gap-4"] do - emptyState_ (Just "video") "No Replay Available" "No session replays associated with this trace" (Just "https://monoscope.tech/docs/sdks/Javascript/browser/") "Session Replay Guide" + Issues.QueryAlert -> do + case AE.fromJSON (getAeson issue.issueData) of + AE.Success (alertData :: Issues.QueryAlertData) -> do + div_ [class_ "mb-4"] do + span_ [class_ "text-sm text-textWeak mb-2 block font-medium"] "Query:" + div_ [class_ "bg-fillInformation-weak border border-strokeInformation-weak rounded-lg p-3 text-sm font-mono text-fillInformation-strong max-w-2xl overflow-x-auto"] + $ toHtml alertData.queryExpression + _ -> pass + Issues.APIChange -> + case AE.fromJSON (getAeson issue.issueData) of + AE.Success (d :: Issues.APIChangeData) -> do + div_ [class_ "flex items-center gap-3 mb-4 p-3 rounded-lg"] do + span_ [class_ $ "badge " <> methodFillColor d.endpointMethod] $ toHtml d.endpointMethod + span_ [class_ "monospace bg-fillWeaker px-2 py-1 rounded text-sm text-textStrong"] $ toHtml d.endpointPath + div_ [class_ "w-px h-4 bg-strokeWeak"] "" + span_ [class_ "flex items-center gap-1.5 text-sm text-textWeak"] do + faSprite_ "server" "regular" "h-3 w-3" + toHtml d.endpointHost + -- Stats and chart + div_ [class_ "grid grid-cols-4 lg:grid-cols-8 gap-4 mb-4"] do + timeStatBox_ "First Seen" $ prettyTimeAuto now (zonedTimeToUTC issue.createdAt) + widget "Request trend" $ "attributes.http.request.method==\"" <> d.endpointMethod <> "\" AND attributes.http.route==\"" <> d.endpointPath <> "\" | summarize count(*) by bin_auto(timestamp)" + _ -> pass + Issues.LogPattern -> + case AE.fromJSON (getAeson issue.issueData) of + AE.Success (d :: Issues.LogPatternData) -> do + div_ [class_ "grid grid-cols-2 lg:grid-cols-4 gap-4 mb-4"] do + statBox_ (Just pid) Nothing "Log Level" "" (fromMaybe "Unknown" d.logLevel) Nothing Nothing + statBox_ (Just pid) Nothing "Service" "" (fromMaybe "Unknown" d.serviceName) Nothing Nothing + statBox_ (Just pid) Nothing "Occurrences" "" (show d.occurrenceCount) Nothing Nothing + timeStatBox_ "First Seen" $ prettyTimeAuto now d.firstSeenAt + div_ [class_ "surface-raised rounded-2xl overflow-hidden mb-4"] do + div_ [class_ "px-4 py-3 border-b border-strokeWeak"] do + span_ [class_ "text-sm font-medium text-textStrong"] "Log Pattern" + div_ [class_ "p-4"] do + pre_ [class_ "text-sm text-textWeak font-mono whitespace-pre-wrap"] $ toHtml d.logPattern + whenJust d.sampleMessage $ \msg -> do + div_ [class_ "surface-raised rounded-2xl overflow-hidden mb-4"] do + div_ [class_ "px-4 py-3 border-b border-strokeWeak"] do + span_ [class_ "text-sm font-medium text-textStrong"] "Sample Message" + div_ [class_ "p-4"] do + pre_ [class_ "text-sm text-textWeak font-mono whitespace-pre-wrap"] $ toHtml msg + _ -> pass + Issues.LogPatternRateChange -> + case AE.fromJSON (getAeson issue.issueData) of + AE.Success (d :: Issues.LogPatternRateChangeData) -> do + div_ [class_ "grid grid-cols-2 lg:grid-cols-4 gap-4 mb-4"] do + statBox_ (Just pid) Nothing "Direction" "" d.changeDirection Nothing Nothing + statBox_ (Just pid) Nothing "Change" "" (show (round d.changePercent :: Int) <> "%") Nothing Nothing + statBox_ (Just pid) Nothing "Current Rate" "" (show (round d.currentRatePerHour :: Int) <> "/hr") Nothing Nothing + statBox_ (Just pid) Nothing "Baseline" "" (show (round d.baselineMean :: Int) <> "/hr") Nothing Nothing + div_ [class_ "surface-raised rounded-2xl overflow-hidden mb-4"] do + div_ [class_ "px-4 py-3 border-b border-strokeWeak"] do + span_ [class_ "text-sm font-medium text-textStrong"] "Log Pattern" + div_ [class_ "p-4"] do + pre_ [class_ "text-sm text-textWeak font-mono whitespace-pre-wrap"] $ toHtml d.logPattern + _ -> pass + div_ [class_ "surface-raised h-max rounded-2xl overflow-hidden", id_ "error-details-container"] do + div_ [class_ "px-4 border-b border-b-strokeWeak flex items-center justify-between"] do + div_ [class_ "flex items-center gap-2"] do + faSprite_ "magnifying-glass-chart" "regular" "w-4 h-4 text-iconNeutral" + h4_ [class_ "text-textStrong text-lg font-medium"] "Investigation" + div_ [class_ "flex items-center"] do + let aUrl = "/p/" <> pid.toText <> "/anomalies/" <> issueId <> "" + a_ [href_ $ aUrl <> "?first_occurrence=true", class_ $ (if isFirst then "text-textBrand font-medium" else "text-textWeak hover:text-textStrong") <> " text-xs py-3 px-3 cursor-pointer transition-colors", term "data-tippy-content" "Show first trace the error occured"] "First" + a_ [href_ aUrl, class_ $ (if isFirst then "text-textWeak hover:text-textStrong" else "text-textBrand font-medium") <> " text-xs py-3 px-3 cursor-pointer transition-colors", term "data-tippy-content" "Show recent trace the error occured"] "Recent" + span_ [class_ "mx-4 w-px h-4 bg-strokeWeak"] pass + button_ [class_ "text-xs py-3 px-3 cursor-pointer err-tab t-tab-active font-medium", onclick_ "navigatable(this, '#span-content', '#error-details-container', 't-tab-active', 'err')"] "Trace" + button_ [class_ "text-xs py-3 px-3 cursor-pointer err-tab font-medium", onclick_ "navigatable(this, '#log-content', '#error-details-container', 't-tab-active', 'err')"] "Logs" + button_ [class_ "text-xs py-3 px-3 cursor-pointer err-tab font-medium", onclick_ "navigatable(this, '#replay-content', '#error-details-container', 't-tab-active', 'err')"] "Replay" + div_ [class_ "p-2 w-full overflow-x-hidden"] do + div_ [class_ "flex w-full err-tab-content", id_ "span-content"] do + div_ [id_ "trace_container", class_ "grow-1 max-w-[80%] w-1/2 min-w-[20%] shrink-1"] do + whenJust tr $ \t -> + tracePage pid t spanRecs + unless (isJust tr) + $ div_ [class_ "flex flex-col items-center justify-center h-48"] do + faSprite_ "inbox-full" "regular" "w-6 h-6 text-textWeak" + span_ [class_ "mt-2 text-sm text-textWeak"] "No trace data available for this error." + div_ [class_ "transition-opacity duration-200 mx-1", id_ "resizer-details_width-wrapper"] $ resizer_ "log_details_container" "details_width" False + div_ [class_ "grow-0 relative shrink-0 overflow-y-auto overflow-x-hidden max-h-[500px] w-1/2 w-c-scroll overflow-x-hidden overflow-y-auto", id_ "log_details_container"] do + span_ [class_ "htmx-indicator query-indicator absolute loading left-1/2 -translate-x-1/2 loading-dots absoute z-10 top-10", id_ "details_indicator"] "" + let (spanId, createdAt) = case spanRecs V.!? 0 of + Just sr -> (sr.uSpanId, formatUTC sr.timestamp) + Nothing -> ("", "") + let url = "/p/" <> pid.toText <> "/log_explorer/" <> spanId <> "/" <> createdAt <> "/detailed" + div_ [hxGet_ url, hxTarget_ "#log_details_container", hxSwap_ "innerHtml", hxTrigger_ "intersect one", hxIndicator_ "#details_indicator", term "hx-sync" "this:replace"] pass + + div_ [id_ "log-content", class_ "hidden err-tab-content"] do + div_ [class_ "flex flex-col gap-4"] do + virtualTable pid (Just ("/p/" <> pid.toText <> "/log_explorer?json=true&query=" <> toUriStr ("kind==\"log\" AND context___trace_id==\"" <> fromMaybe "" (errM >>= (\x -> x.recentTraceId)) <> "\""))) + + div_ [id_ "replay-content", class_ "hidden err-tab-content"] do + let withSessionIds = V.catMaybes $ V.map (\sr -> (`lookupValueText` "id") =<< Map.lookup "session" =<< sr.attributes) spanRecs + unless (V.null withSessionIds) do + let sessionId = V.head withSessionIds + div_ [class_ "border border-r border-l w-max mx-auto"] + $ termRaw "session-replay" [id_ "sessionReplay", term "initialSession" sessionId, class_ "shrink-1 flex flex-col", term "projectId" pid.toText, term "containerId" "sessionPlayerWrapper"] ("" :: Text) + when (V.null withSessionIds) + $ div_ [class_ "flex flex-col gap-4"] do + emptyState_ (Just "video") "No Replay Available" "No session replays associated with this trace" (Just "https://monoscope.tech/docs/sdks/Javascript/browser/") "Session Replay Guide" -- AI Chat section (inline with page content) anomalyAIChat_ pid issue.id @@ -429,9 +474,9 @@ buildAIContext issue errM trDataM spans = , Just $ "- **Title**: " <> issue.title , Just $ "- **Type**: " <> show issue.issueType , Just $ "- **Severity**: " <> issue.severity - , Just $ "- **Service**: " <> issue.service - , Just $ "- **Affected Requests**: " <> show issue.affectedRequests - , Just $ "- **Affected Clients**: " <> show issue.affectedClients + , Just $ "- **Service**: " <> fromMaybe "unknown-service" issue.service + , Just $ "- **Affected Requests**: " <> "0" + , Just $ "- **Affected Clients**: " <> "0" , Just $ "- **Recommended Action**: " <> issue.recommendedAction , errM >>= \err -> Just @@ -935,7 +980,7 @@ renderIssueMainCol pid (IssueVM hideByDefault isWidget currTime timeFilter issue -- Service badge span_ [class_ "flex items-center gap-1"] do div_ [class_ "w-3 h-3 bg-fillYellow rounded-sm"] "" - span_ [class_ "text-textStrong"] $ toHtml issue.service + span_ [class_ "text-textStrong"] $ toHtml $ fromMaybe "unknown-service" issue.service -- Time since span_ [class_ "text-textWeak"] $ toHtml timeSinceString @@ -982,6 +1027,22 @@ renderIssueMainCol pid (IssueVM hideByDefault isWidget currTime timeFilter issue span_ [class_ "text-sm text-textWeak mb-2 block font-medium"] "Query:" div_ [class_ "bg-fillInformation-weak border border-strokeInformation-weak rounded-lg p-3 text-sm monospace text-fillInformation-strong max-w-2xl overflow-x-auto"] $ toHtml alertData.queryExpression _ -> pass + Issues.LogPattern -> case AE.fromJSON (getAeson issue.issueData) of + AE.Success (d :: Issues.LogPatternData) -> + div_ [class_ "border border-strokeWeak rounded-lg mb-4"] do + label_ [class_ "text-sm text-textWeak font-semibold rounded-lg p-2 flex gap-2 items-center cursor-pointer"] do + toHtml $ fromMaybe "LOG" d.logLevel <> " pattern (" <> show d.occurrenceCount <> " occurrences)" + div_ [class_ "bg-fillWeak p-4 overflow-x-scroll group-has-[.lp-input:checked]/lp:block text-sm monospace text-textStrong"] $ pre_ [class_ "whitespace-pre-wrap"] $ toHtml d.logPattern + _ -> pass + Issues.LogPatternRateChange -> case AE.fromJSON (getAeson issue.issueData) of + AE.Success (d :: Issues.LogPatternRateChangeData) -> + div_ [class_ "border border-strokeWeak rounded-lg group/lpr mb-4"] do + label_ [class_ "text-sm text-textWeak font-semibold rounded-lg p-2 flex gap-2 items-center cursor-pointer"] do + faSprite_ "chevron-right" "regular" "h-3 w-3 group-has-[.lpr-input:checked]/lpr:rotate-90" + toHtml $ "Rate " <> d.changeDirection <> " (" <> show (round d.changePercent :: Int) <> "%)" + input_ [class_ "lpr-input w-0 h-0 opacity-0", type_ "checkbox"] + div_ [class_ "bg-fillWeak p-4 overflow-x-scroll hidden group-has-[.lpr-input:checked]/lpr:block text-sm monospace text-textStrong"] $ pre_ [class_ "whitespace-pre-wrap"] $ toHtml d.logPattern + _ -> pass _ -> pass -- Recommended action @@ -1243,6 +1304,8 @@ issueTypeBadge issueType critical = badge cls icon txt (cls, icon, txt) = case issueType of Issues.RuntimeException -> ("bg-fillError-strong", "triangle-alert", "ERROR") Issues.QueryAlert -> ("bg-fillWarning-strong", "zap", "ALERT") + Issues.LogPattern -> ("bg-fillInformation-strong", "file-text", "LOG PATTERN") + Issues.LogPatternRateChange -> ("bg-fillWarning-strong", "trending-up", "RATE CHANGE") Issues.APIChange | critical -> ("bg-fillError-strong", "exclamation-triangle", "BREAKING") | otherwise -> ("bg-fillInformation-strong", "info", "Incremental") diff --git a/src/Pkg/Components/LogQueryBox.hs b/src/Pkg/Components/LogQueryBox.hs index 0d0c0395e..7ed5be52e 100644 --- a/src/Pkg/Components/LogQueryBox.hs +++ b/src/Pkg/Components/LogQueryBox.hs @@ -99,7 +99,7 @@ logQueryBox_ config = do div_ [class_ "w-full gap-2 items-center px-2 hidden group-has-[.ai-search:checked]/fltr:flex"] do faSprite_ "sparkles" "regular" "h-4 w-4 inline-block text-iconBrand" input_ - [ class_ "border-0 w-full flex-1 p-1 outline-none peer" + [ class_ "border-0 w-full flex-1 p-1 no-focus-ring peer" , placeholder_ "Ask. Eg: Logs with errors. Hit Enter to submit" , id_ "ai-search-input" , required_ "required" diff --git a/src/Pkg/Drain.hs b/src/Pkg/Drain.hs index f92926b0f..b3337af54 100644 --- a/src/Pkg/Drain.hs +++ b/src/Pkg/Drain.hs @@ -5,19 +5,27 @@ module Pkg.Drain ( updateTreeWithLog, generateDrainTokens, getAllLogGroups, + -- Exported for testing + tokenize, + extractQuoted, + extractBracketedContent, ) where -import Data.Char (isSpace) +import Data.Aeson qualified as AE import Data.Text qualified as T import Data.Time.Clock (UTCTime) import Data.Vector qualified as V +import Models.Telemetry.Telemetry (SeverityLevel (..)) import Relude -import Utils (replaceAllFormats) +import RequestMessages (valueToFields) +import Utils (extractMessageAndTargetKeyFromLog, extractMessageFromLog, messageKeys, replaceAllFormats) data LogGroup = LogGroup { template :: V.Vector Text , templateStr :: Text + , exampleLog :: Text + , fieldPath :: Text , logIds :: V.Vector Text , frequency :: Int , firstSeen :: UTCTime @@ -29,6 +37,7 @@ data LogGroup = LogGroup data DrainLevelTwo = DrainLevelTwo { firstToken :: Text -- The first token used for grouping + , fieldPath :: Text , logGroups :: V.Vector LogGroup -- Leaf clusters } deriving (Generic, Show) @@ -82,12 +91,14 @@ emptyDrainTree = } -createLogGroup :: V.Vector Text -> Text -> Text -> UTCTime -> LogGroup -createLogGroup templateTokens templateString logId now = +createLogGroup :: V.Vector Text -> Text -> Text -> Text -> UTCTime -> LogGroup +createLogGroup templateTokens templateString logId field now = LogGroup { template = templateTokens , templateStr = templateString , logIds = V.singleton logId + , exampleLog = templateString + , fieldPath = field , frequency = 1 , firstSeen = now , lastSeen = now @@ -105,9 +116,9 @@ calculateSimilarity tokens1 tokens2 in fromIntegral matches / fromIntegral total -updateTreeWithLog :: DrainTree -> Int -> Text -> V.Vector Text -> Text -> Text -> UTCTime -> DrainTree -updateTreeWithLog tree tokenCount firstToken tokensVec logId logContent now = - let (updatedChildren, wasUpdated) = updateOrCreateLevelOne (children tree) tokenCount firstToken tokensVec logId logContent now (config tree) +updateTreeWithLog :: DrainTree -> Int -> Text -> V.Vector Text -> Text -> Bool -> Text -> Text -> UTCTime -> DrainTree +updateTreeWithLog tree tokenCount firstToken tokensVec logId isSampleLog logContent field now = + let (updatedChildren, wasUpdated) = updateOrCreateLevelOne (children tree) tokenCount firstToken tokensVec logId isSampleLog logContent field now (config tree) newTotalLogs = totalLogs tree + 1 newTotalPatterns = if wasUpdated then totalPatterns tree else totalPatterns tree + 1 in tree @@ -117,35 +128,35 @@ updateTreeWithLog tree tokenCount firstToken tokensVec logId logContent now = } -updateOrCreateLevelOne :: V.Vector DrainLevelOne -> Int -> Text -> V.Vector Text -> Text -> Text -> UTCTime -> DrainConfig -> (V.Vector DrainLevelOne, Bool) -updateOrCreateLevelOne levelOnes targetCount firstToken tokensVec logId logContent now config = - case V.findIndex (\level -> tokenCount level == targetCount) levelOnes of +updateOrCreateLevelOne :: V.Vector DrainLevelOne -> Int -> Text -> V.Vector Text -> Text -> Bool -> Text -> Text -> UTCTime -> DrainConfig -> (V.Vector DrainLevelOne, Bool) +updateOrCreateLevelOne levelOnes targetCount firstToken tokensVec logId isSampleLog logContent field now config = + case V.findIndex (\level -> level.tokenCount == targetCount) levelOnes of Just index -> let existingLevel = levelOnes V.! index - (updatedChildren, wasUpdated) = updateOrCreateLevelTwo (nodes existingLevel) firstToken tokensVec logId logContent now config + (updatedChildren, wasUpdated) = updateOrCreateLevelTwo (nodes existingLevel) firstToken tokensVec logId isSampleLog logContent field now config updatedLevel = existingLevel{nodes = updatedChildren} updatedLevelOnes = levelOnes V.// [(index, updatedLevel)] in (updatedLevelOnes, wasUpdated) Nothing -> - let newLogGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId now - newLevelTwo = DrainLevelTwo{firstToken = firstToken, logGroups = V.singleton newLogGroup} + let newLogGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId field now + newLevelTwo = DrainLevelTwo{firstToken = firstToken, fieldPath = field, logGroups = V.singleton newLogGroup} newLevelOne = DrainLevelOne{tokenCount = targetCount, nodes = V.singleton newLevelTwo} updatedLevelOnes = V.cons newLevelOne levelOnes in (updatedLevelOnes, False) -updateOrCreateLevelTwo :: V.Vector DrainLevelTwo -> Text -> V.Vector Text -> Text -> Text -> UTCTime -> DrainConfig -> (V.Vector DrainLevelTwo, Bool) -updateOrCreateLevelTwo levelTwos targetToken tokensVec logId logContent now config = - case V.findIndex (\level -> firstToken level == targetToken) levelTwos of +updateOrCreateLevelTwo :: V.Vector DrainLevelTwo -> Text -> V.Vector Text -> Text -> Bool -> Text -> Text -> UTCTime -> DrainConfig -> (V.Vector DrainLevelTwo, Bool) +updateOrCreateLevelTwo levelTwos targetToken tokensVec logId isSampleLog logContent field now config = + case V.findIndex (\level -> level.firstToken == targetToken && level.fieldPath == field) levelTwos of Just index -> let existingLevel = levelTwos V.! index - (updatedLogGroups, wasUpdated) = updateOrCreateLogGroup (logGroups existingLevel) tokensVec logId logContent now config + (updatedLogGroups, wasUpdated) = updateOrCreateLogGroup (logGroups existingLevel) tokensVec logId isSampleLog logContent field now config updatedLevel = existingLevel{logGroups = updatedLogGroups} updatedLevelTwos = levelTwos V.// [(index, updatedLevel)] in (updatedLevelTwos, wasUpdated) Nothing -> - let newLogGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId now - newLevelTwo = DrainLevelTwo{firstToken = targetToken, logGroups = V.singleton newLogGroup} + let newLogGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId field now + newLevelTwo = DrainLevelTwo{firstToken = targetToken, fieldPath = field, logGroups = V.singleton newLogGroup} updatedLevelTwos = V.cons newLevelTwo levelTwos in (updatedLevelTwos, False) @@ -166,26 +177,26 @@ leastRecentlyUsedIndex logGroups = & maybe 0 fst -updateOrCreateLogGroup :: V.Vector LogGroup -> V.Vector Text -> Text -> Text -> UTCTime -> DrainConfig -> (V.Vector LogGroup, Bool) -updateOrCreateLogGroup logGroups tokensVec logId logContent now config = +updateOrCreateLogGroup :: V.Vector LogGroup -> V.Vector Text -> Text -> Bool -> Text -> Text -> UTCTime -> DrainConfig -> (V.Vector LogGroup, Bool) +updateOrCreateLogGroup logGroups tokensVec logId isSampleLog logContent field now config = case findBestMatch logGroups tokensVec (similarityThreshold config) of Just (index, bestGroup) -> let updatedTemplate = if V.length tokensVec == V.length (template bestGroup) then mergeTemplates (template bestGroup) tokensVec (wildcardToken config) else template bestGroup - updatedGroup = updateLogGroupWithTemplate bestGroup updatedTemplate logId logContent now + updatedGroup = updateLogGroupWithTemplate bestGroup updatedTemplate logId isSampleLog logContent field now updatedGroups = logGroups V.// [(index, updatedGroup)] in (updatedGroups, True) Nothing -> if V.length logGroups >= maxLogGroups config then let victimIdx = leastRecentlyUsedIndex logGroups - newGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId now + newGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId field now updatedGroups = logGroups V.// [(victimIdx, newGroup)] in (updatedGroups, False) else - let newGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId now + let newGroup = createLogGroup tokensVec (unwords $ V.toList tokensVec) logId field now updatedGroups = V.cons newGroup logGroups in (updatedGroups, False) @@ -213,23 +224,37 @@ mergeTemplates template1 template2 wildcardToken = -- Update log group with new template and log information -updateLogGroupWithTemplate :: LogGroup -> V.Vector Text -> Text -> Text -> UTCTime -> LogGroup -updateLogGroupWithTemplate group' newTemplate logId originalLog now = +updateLogGroupWithTemplate :: LogGroup -> V.Vector Text -> Text -> Bool -> Text -> Text -> UTCTime -> LogGroup +updateLogGroupWithTemplate group' newTemplate logId isSampleLog originalLog field now = group' { template = newTemplate , templateStr = unwords $ V.toList newTemplate + , exampleLog = if isSampleLog then originalLog else exampleLog group' , logIds = V.cons logId (logIds group') + , fieldPath = field , frequency = frequency group' + 1 , lastSeen = now } -getAllLogGroups :: DrainTree -> V.Vector (Text, V.Vector Text) +getAllLogGroups :: DrainTree -> V.Vector (Text, Text, Text, V.Vector Text) getAllLogGroups tree = let levelOnes = children tree levelTwos = V.concatMap nodes levelOnes allLogGroups = V.concatMap logGroups levelTwos - in V.map (\grp -> (templateStr grp, logIds grp)) allLogGroups + in V.map (\grp -> (grp.exampleLog, grp.fieldPath, templateStr grp, logIds grp)) allLogGroups + + +generateDrainTokens :: T.Text -> (V.Vector T.Text, Text) +generateDrainTokens content + | looksLikeJson content = + fromMaybe fallback $ do + jsonValue <- AE.decodeStrict' (encodeUtf8 content) + (msg, key) <- extractMessageAndTargetKeyFromLog jsonValue + pure (tokenizeUnstructured msg, key) + | otherwise = fallback + where + fallback = (tokenizeUnstructured content, "body") looksLikeJson :: T.Text -> Bool @@ -238,30 +263,85 @@ looksLikeJson t = || ("[" `T.isInfixOf` t && "]" `T.isSuffixOf` t) -tokenizeJsonLike :: T.Text -> [T.Text] -tokenizeJsonLike txt +-- | Tokenize unstructured log content with smart handling of: +-- - Embedded JSON arrays → [*] +-- - Embedded JSON objects → {*} +-- - Key=value patterns → key=<*> +-- - Quoted strings as single tokens +tokenizeUnstructured :: T.Text -> V.Vector T.Text +tokenizeUnstructured content = + let preprocessed = replaceAllFormats content + in V.fromList $ words preprocessed + + +-- | Tokenize text handling quotes, embedded JSON, and key=value patterns +tokenize :: T.Text -> [T.Text] +tokenize txt | T.null txt = [] - | otherwise = go txt + | otherwise = go (T.stripStart txt) where go t | T.null t = [] - | T.head t `elem` ['{', '}', '[', ']', ',', ':'] = - let c = one (T.head t) - in c : go (T.tail t) + -- Handle quoted strings as single token | T.head t == '"' = - let (quoted, rest) = T.breakOn "\"" (T.tail t) - token = "\"" <> quoted <> "\"" -- include the quotes - in token : go (T.drop 1 rest) - | isSpace (T.head t) = - go (T.dropWhile isSpace t) + let (quoted, rest) = extractQuoted '"' (T.tail t) + in ("\"" <> quoted <> "\"") : go (T.stripStart rest) + | T.head t == '\'' = + let (quoted, rest) = extractQuoted '\'' (T.tail t) + in ("'" <> quoted <> "'") : go (T.stripStart rest) + -- Handle embedded JSON array + | T.head t == '[' = + let (_, rest) = extractBracketedContent '[' ']' t + in "[*]" : go (T.stripStart rest) + -- Handle embedded JSON object + | T.head t == '{' = + let (_, rest) = extractBracketedContent '{' '}' t + in "{*}" : go (T.stripStart rest) + -- Handle regular token (may contain key=value) | otherwise = - let (chunk, rest) = T.span (\c -> not (isSpace c) && notElem c ['{', '}', '[', ']', ',', ':']) t - in chunk : go rest - - -generateDrainTokens :: T.Text -> V.Vector T.Text -generateDrainTokens content = - let replaced = replaceAllFormats content - in if looksLikeJson replaced - then V.fromList (tokenizeJsonLike replaced) - else V.fromList $ words replaced + let (token, rest) = T.break isTokenDelimiter t + in if T.null token + then go (T.drop 1 t) + else processToken token ++ go (T.stripStart rest) + + isTokenDelimiter c = c `elem` [' ', '\t', '\n', '\r', '[', '{'] + + -- Process a token, handling key=value patterns + processToken :: T.Text -> [T.Text] + processToken token = + case T.breakOn "=" token of + (key, rest) + | not (T.null rest) && not (T.null key) && T.length rest > 1 -> + -- Has key=value pattern: keep key=<*> + [key <> "=<*>"] + | otherwise -> [token] + + +-- | Extract content within quotes, handling escaped quotes +extractQuoted :: Char -> T.Text -> (T.Text, T.Text) +extractQuoted quoteChar txt = go txt "" + where + go t acc + | T.null t = (acc, t) + | T.head t == '\\' && T.length t > 1 && T.index t 1 == quoteChar = + go (T.drop 2 t) (acc <> one '\\' <> one quoteChar) + | T.head t == quoteChar = (acc, T.tail t) + | otherwise = go (T.tail t) (acc <> one (T.head t)) + + +-- | Extract content within brackets, handling nested brackets +extractBracketedContent :: Char -> Char -> T.Text -> (T.Text, T.Text) +extractBracketedContent open close txt + | T.null txt = ("", txt) + | T.head txt /= open = ("", txt) + | otherwise = go (T.tail txt) "" (1 :: Int) + where + go t acc depth + | T.null t = (acc, t) + | depth == 0 = (acc, t) + | T.head t == open = go (T.tail t) (acc <> one open) (depth + 1) + | T.head t == close = + if depth == 1 + then (acc, T.tail t) + else go (T.tail t) (acc <> one close) (depth - 1) + | otherwise = go (T.tail t) (acc <> one (T.head t)) depth diff --git a/src/Utils.hs b/src/Utils.hs index 4bfbebefb..21f61ba5f 100644 --- a/src/Utils.hs +++ b/src/Utils.hs @@ -45,6 +45,8 @@ module Utils ( prettyPrintCount, formatWithCommas, extractMessageFromLog, + extractMessageAndTargetKeyFromLog, + messageKeys, -- Fill color helpers statusFillColor, statusFillColorText, @@ -738,6 +740,18 @@ extractMessageFromLog (AE.Object obj) = extractMessageFromLog _ = Nothing +extractMessageAndTargetKeyFromLog :: Value -> Maybe (Text, Text) +extractMessageAndTargetKeyFromLog (AE.Object obj) = + listToMaybe [v | key <- messageKeys, Just v <- [extractValue key obj]] + where + extractValue :: T.Text -> Object -> Maybe (T.Text, T.Text) + extractValue key km = case Data.Aeson.KeyMap.lookup (fromText key) km of + Just (AE.String s) -> Just (s, key) + Just val -> Just (toText $ show val, key) + Nothing -> Nothing +extractMessageAndTargetKeyFromLog _ = Nothing + + -- | Get fill color class for HTTP status codes statusFillColor :: Int -> Text statusFillColor code diff --git a/static/migrations/0026_log_patterns.sql b/static/migrations/0026_log_patterns.sql new file mode 100644 index 000000000..a8f7cb94c --- /dev/null +++ b/static/migrations/0026_log_patterns.sql @@ -0,0 +1,73 @@ +BEGIN; + +CREATE TABLE IF NOT EXISTS apis.log_patterns ( + id BIGSERIAL PRIMARY KEY, + project_id UUID NOT NULL REFERENCES projects.projects(id) ON DELETE CASCADE, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + + log_pattern TEXT NOT NULL, + pattern_hash TEXT NOT NULL, + + service_name TEXT, + log_level TEXT, + sample_message TEXT, + state TEXT NOT NULL DEFAULT 'new', -- 'new', 'acknowledged', 'ignored' + first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + occurrence_count BIGINT NOT NULL DEFAULT 1, + + acknowledged_by UUID REFERENCES users.users(id), + acknowledged_at TIMESTAMPTZ, + + -- Baseline for volume spike detection + baseline_state TEXT NOT NULL DEFAULT 'learning', -- 'learning', 'established' + baseline_volume_hourly_mean FLOAT, + baseline_volume_hourly_stddev FLOAT, + baseline_samples INT NOT NULL DEFAULT 0, + baseline_updated_at TIMESTAMPTZ, + + UNIQUE(project_id, pattern_hash) +); + +SELECT manage_updated_at('apis.log_patterns'); + +CREATE INDEX IF NOT EXISTS idx_log_patterns_project ON apis.log_patterns(project_id); +CREATE INDEX IF NOT EXISTS idx_log_patterns_project_state ON apis.log_patterns(project_id, state); +CREATE INDEX IF NOT EXISTS idx_log_patterns_last_seen ON apis.log_patterns(project_id, last_seen_at DESC); +CREATE INDEX IF NOT EXISTS idx_log_patterns_service ON apis.log_patterns(project_id, service_name); + +CREATE OR REPLACE FUNCTION apis.new_log_pattern_proc() RETURNS trigger AS $$ +BEGIN + IF TG_WHEN <> 'AFTER' THEN + RAISE EXCEPTION 'apis.new_log_pattern_proc() may only run as an AFTER trigger'; + END IF; + INSERT INTO background_jobs (run_at, status, payload) + VALUES ( + NOW(), + 'queued', + jsonb_build_object( + 'tag', 'NewLogPatternDetected', + 'contents', jsonb_build_array(NEW.project_id, NEW.pattern_hash) + ) + ); + RETURN NULL; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE TRIGGER log_pattern_created_notify AFTER INSERT ON apis.log_patterns FOR EACH ROW EXECUTE PROCEDURE apis.new_log_pattern_proc(); + +ALTER TYPE apis.issue_type ADD VALUE IF NOT EXISTS 'log_pattern'; +ALTER TYPE apis.issue_type ADD VALUE IF NOT EXISTS 'log_pattern_rate_change'; +ALTER TABLE apis.log_patterns ADD COLUMN trace_id TEXT; + +ALTER TABLE apis.issues ADD COLUMN IF NOT EXISTS source_type TEXT NOT NULL DEFAULT ''; +ALTER TABLE apis.issues ADD COLUMN IF NOT EXISTS target_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE apis.issues ADD COLUMN IF NOT EXISTS environment TEXT; + +-- Add field_path column and update unique constraint +ALTER TABLE apis.log_patterns ADD COLUMN IF NOT EXISTS field_path TEXT NOT NULL DEFAULT 'body'; +ALTER TABLE apis.log_patterns DROP CONSTRAINT IF EXISTS log_patterns_project_id_pattern_hash_key; +ALTER TABLE apis.log_patterns ADD CONSTRAINT log_patterns_project_level_path_hash_key UNIQUE(project_id, log_level, field_path, pattern_hash); + +COMMIT; diff --git a/static/public/assets/css/tailwind.css b/static/public/assets/css/tailwind.css index be2ea8e1e..bdc3b9b56 100644 --- a/static/public/assets/css/tailwind.css +++ b/static/public/assets/css/tailwind.css @@ -526,10 +526,13 @@ } /* Enhanced focus visibility with offset */ - :focus-visible { + :focus-visible:not(.no-focus-ring) { outline: 2px solid var(--color-strokeFocus); outline-offset: 2px; } + .no-focus-ring:focus-visible { + outline: none !important; + } .btn:focus-visible, .input:focus-visible, .select:focus-visible, .textarea:focus-visible { outline-offset: 1px; } @@ -1244,14 +1247,17 @@ body, :host { } /* Enhanced focus states for better keyboard navigation visibility */ - :focus-visible { + :focus-visible:not(.no-focus-ring) { outline: 2px solid var(--color-strokeFocus); outline-offset: 2px; } + .no-focus-ring:focus-visible { + outline: none !important; + } /* High contrast focus for forced colors mode (Windows high contrast) */ @media (forced-colors: active) { - :focus-visible { + :focus-visible:not(.no-focus-ring) { outline: 3px solid CanvasText; } } diff --git a/test/unit/Pkg/DrainSpec.hs b/test/unit/Pkg/DrainSpec.hs index c066cc094..0b243081c 100644 --- a/test/unit/Pkg/DrainSpec.hs +++ b/test/unit/Pkg/DrainSpec.hs @@ -4,7 +4,7 @@ import Test.Hspec import Data.Vector qualified as V import Utils qualified import Data.Time -import Pkg.Drain +import Pkg.Drain (DrainTree, emptyDrainTree, updateTreeWithLog, getAllLogGroups, tokenize, extractQuoted, extractBracketedContent) import Relude -- Helper function to create a test time @@ -24,36 +24,111 @@ processNewLog logId logContent now tree = do firstToken = if V.null tokensVec then "" else V.head tokensVec in if tokenCount == 0 then tree -- Skip empty logs - else updateTreeWithLog tree tokenCount firstToken tokensVec logId logContent now + else updateTreeWithLog tree tokenCount firstToken tokensVec logId True logContent now processBatch :: V.Vector (Text, Text) -> UTCTime -> DrainTree -> DrainTree processBatch logBatch now initialTree = do V.foldl (\tree (logId, logContent) -> processNewLog logId logContent now tree) initialTree logBatch spec :: Spec -spec = describe "DRAIN updateTreeWithLog" $ do - describe "End to End drain tree test" $ do +spec = do + describe "tokenize" $ do + it "handles basic tokenization" $ + tokenize "hello world" `shouldBe` ["hello", "world"] + + it "keeps quoted strings as single tokens" $ + tokenize "Added products \"one, two, three\" to cart" + `shouldBe` ["Added", "products", "\"one, two, three\"", "to", "cart"] + + it "handles single quoted strings at start of token" $ + tokenize "'hello world' is a greeting" + `shouldBe` ["'hello world'", "is", "a", "greeting"] + + it "replaces key=value pattern values with <*>" $ + tokenize "userId=12345 status=active" + `shouldBe` ["userId=<*>", "status=<*>"] + + it "converts embedded JSON arrays to [*]" $ + tokenize "Processing [1, 2, 3] items" + `shouldBe` ["Processing", "[*]", "items"] + + it "converts embedded JSON objects to {*}" $ + tokenize "Config {\"debug\": true} loaded" + `shouldBe` ["Config", "{*}", "loaded"] + + it "handles nested brackets" $ + tokenize "data [[1,2],[3,4]] end" + `shouldBe` ["data", "[*]", "end"] + + it "handles combined example" $ + tokenize "GET /api/users [200] {\"count\": 5}" + `shouldBe` ["GET", "/api/users", "[*]", "{*}"] + + describe "extractQuoted" $ do + it "extracts basic quoted string" $ + extractQuoted '"' "hello world\" rest" + `shouldBe` ("hello world", " rest") + + it "handles escaped quotes" $ + extractQuoted '"' "hello \\\"world\\\"\" rest" + `shouldBe` ("hello \\\"world\\\"", " rest") + + it "handles single quotes" $ + extractQuoted '\'' "hello world' more" + `shouldBe` ("hello world", " more") + + it "returns all content for unclosed quote" $ + extractQuoted '"' "no closing quote" + `shouldBe` ("no closing quote", "") + + describe "extractBracketedContent" $ do + it "extracts simple array" $ + extractBracketedContent '[' ']' "[1, 2, 3] rest" + `shouldBe` ("1, 2, 3", " rest") + + it "handles nested arrays" $ + extractBracketedContent '[' ']' "[[1,2],[3,4]] end" + `shouldBe` ("[1,2],[3,4]", " end") + + it "extracts JSON object" $ + extractBracketedContent '{' '}' "{\"key\": \"value\"} more" + `shouldBe` ("\"key\": \"value\"", " more") + + it "handles nested objects" $ + extractBracketedContent '{' '}' "{\"a\": {\"b\": 1}} rest" + `shouldBe` ("\"a\": {\"b\": 1}", " rest") + + it "returns empty for no opening bracket" $ + extractBracketedContent '[' ']' "no bracket" + `shouldBe` ("", "no bracket") + + it "returns all content for unclosed bracket" $ + extractBracketedContent '[' ']' "[unclosed" + `shouldBe` ("unclosed", "") + + describe "DRAIN updateTreeWithLog" $ do + describe "End to End drain tree test" $ do it "should get correct log pattern for HTTP requests" $ do let initialTree = emptyDrainTree updatedTree = processBatch (V.fromList basicHttpLogs) (testTimeOffset 0) initialTree logGroups = getAllLogGroups updatedTree length logGroups `shouldBe` 3 - let patterns = V.map fst logGroups - V.toList patterns `shouldMatchList` + let patterns = V.map (\(_, template, _) -> template) logGroups + V.toList patterns `shouldMatchList` [ "DELETE /api/users/{integer} HTTP/{float} {integer}" , "POST /api/users HTTP/{float} {integer}" , "GET <*> HTTP/{float} {integer}" - ] - let log1 = V.find (\(tmp, logIds) -> tmp == "GET <*> HTTP/{float} {integer}") logGroups + ] + let log1 = V.find (\(_, tmp, _) -> tmp == "GET <*> HTTP/{float} {integer}") logGroups case log1 of - Just (_, lg) -> lg `shouldBe` V.fromList ["log5", "log3", "log2", "log1"] + Just (_, _, lg) -> lg `shouldBe` V.fromList ["log5", "log3", "log2", "log1"] Nothing -> error "log1 pattern not found" - let log2 = V.find (\(tmp, logIds) -> tmp == "POST /api/users HTTP/{float} {integer}") logGroups + let log2 = V.find (\(_, tmp, _) -> tmp == "POST /api/users HTTP/{float} {integer}") logGroups case log2 of - Just (_, lg) -> lg `shouldBe` V.fromList ["log4"] + Just (_, _, lg) -> lg `shouldBe` V.fromList ["log4"] Nothing -> error "log2 pattern not found" - let log3 = V.find (\(tmp, logIds) -> tmp == "DELETE /api/users/{integer} HTTP/{float} {integer}") logGroups + let log3 = V.find (\(_, tmp, _) -> tmp == "DELETE /api/users/{integer} HTTP/{float} {integer}") logGroups case log3 of - Just (_, lg) -> lg `shouldBe` V.fromList ["log6"] + Just (_, _, lg) -> lg `shouldBe` V.fromList ["log6"] Nothing -> error "log3 pattern not found" pass @@ -61,23 +136,23 @@ spec = describe "DRAIN updateTreeWithLog" $ do let initialTree = emptyDrainTree updatedTree = processBatch (V.fromList databaseLogs) (testTimeOffset 0) initialTree logGroups = getAllLogGroups updatedTree - let patterns = V.map fst logGroups + let patterns = V.map (\(_, template, _) -> template) logGroups V.toList patterns `shouldMatchList` [ "Connected to database <*>" , "Database query executed in {integer}ms" , "Connection pool exhausted max={integer} active={integer}" ] - let log1 = V.find (\(tmp, logIds) -> tmp == "Connected to database <*>") logGroups + let log1 = V.find (\(_, tmp, _) -> tmp == "Connected to database <*>") logGroups case log1 of - Just (_, lg) -> lg `shouldBe` V.fromList ["db3", "db2", "db1"] + Just (_, _, lg) -> lg `shouldBe` V.fromList ["db3", "db2", "db1"] Nothing -> error "db1 pattern not found" - let log2 = V.find (\(tmp, logIds) -> tmp == "Database query executed in {integer}ms") logGroups + let log2 = V.find (\(_, tmp, _) -> tmp == "Database query executed in {integer}ms") logGroups case log2 of - Just (_, lg) -> lg `shouldBe` V.fromList ["db6", "db5", "db4"] + Just (_, _, lg) -> lg `shouldBe` V.fromList ["db6", "db5", "db4"] Nothing -> error "db2 pattern not found" - let log3 = V.find (\(tmp, logIds) -> tmp == "Connection pool exhausted max={integer} active={integer}") logGroups + let log3 = V.find (\(_, tmp, _) -> tmp == "Connection pool exhausted max={integer} active={integer}") logGroups case log3 of - Just (_, lg) -> lg `shouldBe` V.fromList ["db8", "db7"] + Just (_, _, lg) -> lg `shouldBe` V.fromList ["db8", "db7"] Nothing -> error "db3 pattern not found" pass @@ -86,28 +161,28 @@ spec = describe "DRAIN updateTreeWithLog" $ do updatedTree = processBatch (V.fromList startupLogs) (testTimeOffset 0) initialTree logGroups = getAllLogGroups updatedTree length logGroups `shouldBe` 4 - let patterns = V.map fst logGroups + let patterns = V.map (\(_, template, _) -> template) logGroups V.toList patterns `shouldMatchList` [ "Initializing Redis connection <*>" , "Application ready to serve requests" , "Loading configuration from <*>" , "Starting application on port {integer}" ] - let log1 = V.find (\(tmp, logIds) -> tmp == "Starting application on port {integer}") logGroups + let log1 = V.find (\(_, tmp, _) -> tmp == "Starting application on port {integer}") logGroups case log1 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["start3", "start2", "start1"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["start3", "start2", "start1"] Nothing -> error "start1 pattern not found" - let log2 = V.find (\(tmp, logIds) -> tmp == "Loading configuration from <*>") logGroups + let log2 = V.find (\(_, tmp, _) -> tmp == "Loading configuration from <*>") logGroups case log2 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["start4", "start5"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["start4", "start5"] Nothing -> error "start4 pattern not found" - let log3 = V.find (\(tmp, logIds) -> tmp == "Initializing Redis connection <*>") logGroups + let log3 = V.find (\(_, tmp, _) -> tmp == "Initializing Redis connection <*>") logGroups case log3 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["start6", "start7"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["start6", "start7"] Nothing -> error "start6 pattern not found" - let log4 = V.find (\(tmp, logIds) -> tmp == "Application ready to serve requests") logGroups + let log4 = V.find (\(_, tmp, _) -> tmp == "Application ready to serve requests") logGroups case log4 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["start9", "start8"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["start9", "start8"] Nothing -> error "start8 pattern not found" pass @@ -116,28 +191,28 @@ spec = describe "DRAIN updateTreeWithLog" $ do updatedTree = processBatch (V.fromList errorLogs) (testTimeOffset 0) initialTree logGroups = getAllLogGroups updatedTree length logGroups `shouldBe` 4 - let patterns = V.map fst logGroups + let patterns = V.map (\(_, template, _) -> template) logGroups V.toList patterns `shouldMatchList` [ "ERROR Failed to authenticate user {email}" , "ERROR Database connection timeout after {integer}ms" , "WARN Retrying failed request attempt {integer} of {integer}" , "FATAL Out of memory heap size {integer}MB exceeded" ] - let log1 = V.find (\(tmp, logIds) -> tmp == "WARN Retrying failed request attempt {integer} of {integer}") logGroups + let log1 = V.find (\(_, tmp, _) -> tmp == "WARN Retrying failed request attempt {integer} of {integer}") logGroups case log1 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["err7", "err6", "err5"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["err7", "err6", "err5"] Nothing -> error "err1 pattern not found" - let log2 = V.find (\(tmp, logIds) -> tmp == "ERROR Failed to authenticate user {email}") logGroups + let log2 = V.find (\(_, tmp, _) -> tmp == "ERROR Failed to authenticate user {email}") logGroups case log2 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["err2", "err1"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["err2", "err1"] Nothing -> error "err2 pattern not found" - let log3 = V.find (\(tmp, logIds) -> tmp == "ERROR Database connection timeout after {integer}ms") logGroups + let log3 = V.find (\(_, tmp, _) -> tmp == "ERROR Database connection timeout after {integer}ms") logGroups case log3 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["err4", "err3"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["err4", "err3"] Nothing -> error "err3 pattern not found" - let log4 = V.find (\(tmp, logIds) -> tmp == "FATAL Out of memory heap size {integer}MB exceeded") logGroups + let log4 = V.find (\(_, tmp, _) -> tmp == "FATAL Out of memory heap size {integer}MB exceeded") logGroups case log4 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["err9", "err8"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["err9", "err8"] Nothing -> error "pattern not found" pass @@ -145,49 +220,49 @@ spec = describe "DRAIN updateTreeWithLog" $ do let initialTree = emptyDrainTree updatedTree = processBatch (V.fromList timestampedLogs) (testTimeOffset 0) initialTree logGroups = getAllLogGroups updatedTree - let patterns = V.map fst logGroups + let patterns = V.map (\(_, template, _) -> template) logGroups V.toList patterns `shouldMatchList` [ "{YYYY-MM-DDThh:mm:ss.sTZD} INFO User <*> <*> <*>" , "{YYYY-MM-DDThh:mm:ss.sTZD} ERROR Invalid token provided <*>" , "{YYYY-MM-DDThh:mm:ss.sTZD} WARN Rate limit exceeded client={ipv4}" ] - let log1 = V.find (\(tmp, logIds) -> tmp == "{YYYY-MM-DDThh:mm:ss.sTZD} INFO User <*> <*> <*>") logGroups + let log1 = V.find (\(_, tmp, _) -> tmp == "{YYYY-MM-DDThh:mm:ss.sTZD} INFO User <*> <*> <*>") logGroups case log1 of - Just (_, lg) -> do + Just (_, _, lg) -> do "ts1" `V.elem` lg `shouldBe` True "ts2" `V.elem` lg `shouldBe` True Nothing -> error "ts1 pattern not found" - let log3 = V.find (\(tmp, logIds) -> tmp == "{YYYY-MM-DDThh:mm:ss.sTZD} ERROR Invalid token provided <*>") logGroups + let log3 = V.find (\(_, tmp, _) -> tmp == "{YYYY-MM-DDThh:mm:ss.sTZD} ERROR Invalid token provided <*>") logGroups case log3 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["ts5", "ts6"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["ts5", "ts6"] Nothing -> error "ts5 pattern not found" - let log4 = V.find (\(tmp, logIds) -> tmp == "{YYYY-MM-DDThh:mm:ss.sTZD} WARN Rate limit exceeded client={ipv4}") logGroups + let log4 = V.find (\(_, tmp, _) -> tmp == "{YYYY-MM-DDThh:mm:ss.sTZD} WARN Rate limit exceeded client={ipv4}") logGroups case log4 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["ts7", "ts8"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["ts7", "ts8"] Nothing -> error "ts7 pattern not found" it "should get correct log patterns for microservice logs" $ do let initialTree = emptyDrainTree updatedTree = processBatch (V.fromList microserviceLogs) (testTimeOffset 0) initialTree logGroups = getAllLogGroups updatedTree - let patterns = V.map fst logGroups + let patterns = V.map (\(_, template, _) -> template) logGroups V.toList patterns `shouldMatchList` [ "payment-service processing payment amount={float} <*>" , "auth-service JWT validation successful for user <*>" , "user-service database query SELECT * FROM users WHERE id={integer} took {integer}ms" , "user-service received request <*> <*> <*>" ] - let log1 = V.find (\(tmp, logIds) -> tmp == "user-service received request <*> <*> <*>" ) logGroups + let log1 = V.find (\(_, tmp, _) -> tmp == "user-service received request <*> <*> <*>" ) logGroups case log1 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["svc2", "svc1"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["svc2", "svc1"] Nothing -> error "svc1 pattern not found" - let log3 = V.find (\(tmp, logIds) -> tmp == "user-service database query SELECT * FROM users WHERE id={integer} took {integer}ms") logGroups + let log3 = V.find (\(_, tmp, _) -> tmp == "user-service database query SELECT * FROM users WHERE id={integer} took {integer}ms") logGroups case log3 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["svc4", "svc5"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["svc4", "svc5"] Nothing -> error "svc4 pattern not found" - let log4 = V.find (\(tmp, logIds) -> tmp == "auth-service JWT validation successful for user <*>") logGroups + let log4 = V.find (\(_, tmp, _) -> tmp == "auth-service JWT validation successful for user <*>") logGroups case log4 of - Just (_, lg) -> V.toList lg `shouldMatchList` ["svc7", "svc8"] + Just (_, _, lg) -> V.toList lg `shouldMatchList` ["svc7", "svc8"] Nothing -> error "svc7 pattern not found" basicHttpLogs :: [(Text, Text)]