Skip to content

Commit

Permalink
* updated documentation
Browse files Browse the repository at this point in the history
* minor webui security fix
* this is final StatsAgg 1.3 release
  • Loading branch information
Jeffrey Schmidt committed May 20, 2015
1 parent 907ac33 commit dcb2ebe
Show file tree
Hide file tree
Showing 21 changed files with 3,575 additions and 3,416 deletions.
2 changes: 1 addition & 1 deletion ChangeLog.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
StatsAgg 1.3 - released 2015-05-xx
StatsAgg 1.3 - released 2015-05-20
* Added new properties to an alert -- 'first triggered at' timestamp for caution & danger
* Resent email notifications include a field that says how long the alert has been triggered
* New WebUI view -- 'Metric Group - Alert Associations'. This page allows a user to see every alert that is associated with a particular 'metric group'.
Expand Down
27 changes: 15 additions & 12 deletions dev/Todo.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
StatsAgg 1.3
* Final QA checks & documentation updates

Short/Medium term
* Optimize Graphite metric formatter
StatsAgg 1.4
* updateAlertMetricRecentValues -- turn metricTimestampsAndValues into a list
* metricKeysLastSeenTimestamp & metricKeysLastSeenTimestamp_UpdateOnResend -- one Map, defined as Map<MetricKey,Long[ts1,ts2]>
* Support 'no timestamp' on Graphite metrics
* Optimize/Improve Graphite metric formatter
* Native InfluxDB support
* Support output via OpenTSDB HTTP/JSON
* Let the user control the number of metrics to return on 'Regex Tester' and 'Metric Group - Metric Key Associations'

Short/Medium term
* Manual injection of metrics via a new servlet w/ a WebUI component
* Multi-thread StatsD aggregation routine
* Let the user control the number of metrics to return on 'Regex Tester' and 'Metric Group - Metric Key Associations'
* Apply column-sort to WebUI tables via parameter in URI
* Control what StatsD 'timer' fields are outputted (so people can ignore the more useless ones)
* Debug mode for "details" pages (include additional details)
* Advanced options
* Maximum concurrently running output-threads
* Incoming metric rate limits
* Output-thread timeout
* (optional) Limit metric-key length displayed in StatsAgg WebUI
* (optional) Limit metric group association count
* (optional) Limit max # datapoints for in memory for a single metric-key
* (optional) Maximum concurrently running output-threads
* (optional) Incoming metric rate limits
* (optional) Output-thread timeout
* Cleanup metrics after... (currently hardcoded to 24hrs)

Long-term / Wishlist / Brainstorming
Expand Down Expand Up @@ -43,6 +48,4 @@ Long-term / Wishlist / Brainstorming
* Little pay-off for the effort involved.
* Convert to a regular Java app & use embedded Tomcat for the WebUI
* Simplifies installation, but removes all the nice stuff we get from Tomcat.
* Would need to be a separate git repo
* When altering alerts & 'stuff is in an alerted status', don't null the alert status if the alert status is the same.
* May require substantial refactoring
* Would need to be a separate git repo
Binary file modified docs/manual.pdf
Binary file not shown.
24 changes: 17 additions & 7 deletions src/main/java/com/pearson/statsagg/alerts/AlertThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
import com.pearson.statsagg.metric_aggregation.threads.SendMetricsToOpenTsdbThread;
import com.pearson.statsagg.utilities.MathUtilities;
import com.pearson.statsagg.utilities.StackTrace;
import com.pearson.statsagg.utilities.StringUtilities;
import com.pearson.statsagg.utilities.Threads;
import com.pearson.statsagg.webui.StatsAggHtmlFramework;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
Expand Down Expand Up @@ -348,7 +348,7 @@ private void alertRecoveryRoutine_HasReachedPreviousState() {

if (hasAlertReachedPreviousState) {
pendingCautionAlertsByAlertId_.remove(alertId);
String cleanAlertName = StatsAggHtmlFramework.removeNewlinesFromString(alert.getName(), ' ');
String cleanAlertName = StringUtilities.removeNewlinesFromString(alert.getName(), ' ');
logger.info("Routine=AlertRecovery, AlertName=\"" + cleanAlertName + "\", Message=\"Caution alerting enabled after reaching previous window state\"");
}
}
Expand All @@ -369,7 +369,7 @@ private void alertRecoveryRoutine_HasReachedPreviousState() {
if (hasAlertReachedPreviousState) {
Alert alert = pendingDangerAlertsByAlertId_.get(alertId);
pendingDangerAlertsByAlertId_.remove(alertId);
String cleanAlertName = StatsAggHtmlFramework.removeNewlinesFromString(alert.getName(), ' ');
String cleanAlertName = StringUtilities.removeNewlinesFromString(alert.getName(), ' ');
logger.info("Routine=AlertRecovery, AlertName=\"" + cleanAlertName + "\", Message=\"Danger alerting enabled after reaching previous window state\"");
}
}
Expand All @@ -393,7 +393,7 @@ private void alertRecoveryRoutine_HasReachedWindowDuration(long applicationStart

if ((alert.getCautionWindowDuration() != null) && (timeSinceStartup >= alert.getCautionWindowDuration())) {
pendingCautionAlertsByAlertId_.remove(alertId);
String cleanAlertName = StatsAggHtmlFramework.removeNewlinesFromString(alert.getName(), ' ');
String cleanAlertName = StringUtilities.removeNewlinesFromString(alert.getName(), ' ');
logger.info("Routine=AlertRecovery, AlertName=\"" + cleanAlertName + "\", Message=\"Caution alerting enabled after reaching window duration (" + timeSinceStartup + "ms)\"");
}
}
Expand All @@ -407,7 +407,7 @@ private void alertRecoveryRoutine_HasReachedWindowDuration(long applicationStart

if ((alert.getDangerWindowDuration() != null) && (timeSinceStartup >= alert.getDangerWindowDuration())) {
pendingDangerAlertsByAlertId_.remove(alertId);
String cleanAlertName = StatsAggHtmlFramework.removeNewlinesFromString(alert.getName(), ' ');
String cleanAlertName = StringUtilities.removeNewlinesFromString(alert.getName(), ' ');
logger.info("Routine=AlertRecovery, AlertName=\"" + cleanAlertName + "\", Message=\"Danger alerting enabled after reaching window duration (" + timeSinceStartup + "ms)\"");
}
}
Expand Down Expand Up @@ -729,7 +729,12 @@ private static void determineAlertStatus_Caution(Alert alert, AlertThread alertT
positiveAlertReasons.put(metricKey, "Reached 'Stop Tracking' time limit");
}
}
else if (!isAvailabilityAlert_And_HitStopTrackingLimit && (availabilityAlert_TimeSinceLastSeen == null)) { // a recent metric value has been detected -- so the availability alert is not active
else if (!isAvailabilityAlert_And_HitStopTrackingLimit && (availabilityAlert_TimeSinceLastSeen == null) && (metricKeyLastSeenTimestamp == null)) { // the metric has been deleted
if (activeCautionAvailabilityMetricKeys != null) {
activeCautionAvailabilityMetricKeys.remove(metricKey);
}
}
else if (!isAvailabilityAlert_And_HitStopTrackingLimit && (availabilityAlert_TimeSinceLastSeen == null) && (metricKeyLastSeenTimestamp != null)) { // a recent metric value has been detected -- so the availability alert is not active
if (activeCautionAvailabilityMetricKeys != null) {
activeCautionAvailabilityMetricKeys.remove(metricKey);

Expand Down Expand Up @@ -797,7 +802,12 @@ private static void determineAlertStatus_Danger(Alert alert, AlertThread alertTh
positiveAlertReasons.put(metricKey, "Reached 'Stop Tracking' time limit");
}
}
else if (!isAvailabilityAlert_And_HitStopTrackingLimit && (availabilityAlert_TimeSinceLastSeen == null)) { // a recent metric value has been detected -- so the availability alert is not active
else if (!isAvailabilityAlert_And_HitStopTrackingLimit && (availabilityAlert_TimeSinceLastSeen == null) && (metricKeyLastSeenTimestamp == null)) { // the metric has been deleted
if (activeDangerAvailabilityMetricKeys != null) {
activeDangerAvailabilityMetricKeys.remove(metricKey);
}
}
else if (!isAvailabilityAlert_And_HitStopTrackingLimit && (availabilityAlert_TimeSinceLastSeen == null) && (metricKeyLastSeenTimestamp != null)) { // a recent metric value has been detected -- so the availability alert is not active
if (activeDangerAvailabilityMetricKeys != null) {
activeDangerAvailabilityMetricKeys.remove(metricKey);

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/pearson/statsagg/alerts/CleanupThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import com.pearson.statsagg.globals.GlobalVariables;
import com.pearson.statsagg.metric_aggregation.MetricTimestampAndValue;
import com.pearson.statsagg.utilities.StackTrace;
import com.pearson.statsagg.webui.StatsAggHtmlFramework;
import com.pearson.statsagg.utilities.StringUtilities;
import java.util.HashSet;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.codec.digest.DigestUtils;
Expand Down Expand Up @@ -223,7 +223,7 @@ private Set<String> cleanupGauges(Set<String> gaugeMetricKeys) {
gaugeMetricKeys_SuccessfullyDeleted.add(metricKey);
}
else {
String cleanBucketToForget = StatsAggHtmlFramework.removeNewlinesFromString(metricKey);
String cleanBucketToForget = StringUtilities.removeNewlinesFromString(metricKey);
logger.error("Failed deleting gauge from the database. Gauge=\"" + cleanBucketToForget + "\"");
}
}
Expand Down
13 changes: 7 additions & 6 deletions src/main/java/com/pearson/statsagg/alerts/EmailThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.pearson.statsagg.webui.StatsAggHtmlFramework;
import com.pearson.statsagg.utilities.EmailUtils;
import com.pearson.statsagg.utilities.StackTrace;
import com.pearson.statsagg.utilities.StringUtilities;
import java.util.Arrays;
import java.util.Collections;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -262,8 +263,8 @@ else if ((warningLevel_ == WARNING_LEVEL_DANGER) && (alert_.getDangerActiveAlert

body_ = body.toString();

String cleanSubject = StatsAggHtmlFramework.removeNewlinesFromString(subject_, ' ');
String cleanBody = StatsAggHtmlFramework.removeNewlinesFromString(body_, ' ');
String cleanSubject = StringUtilities.removeNewlinesFromString(subject_, ' ');
String cleanBody = StringUtilities.removeNewlinesFromString(body_, ' ');
logger.debug(cleanSubject + "\n" + cleanBody);
}

Expand All @@ -282,7 +283,7 @@ private void sendEmail(String smtpHost, int smtpPort, String username, String pa
String fromAddress, String fromName, List<String> toAddresses, String emailSubject, String emailBody) {

if (toAddresses.isEmpty()) {
String cleanSubject = StatsAggHtmlFramework.removeNewlinesFromString(emailSubject, ' ');
String cleanSubject = StringUtilities.removeNewlinesFromString(emailSubject, ' ');
logger.debug("Message=\"Failed to send email alert. No valid recipients.\", EmailSubject=\"" + cleanSubject + "\"");
return;
}
Expand All @@ -306,12 +307,12 @@ private void sendEmail(String smtpHost, int smtpPort, String username, String pa
email.setHtmlMsg(emailBody);
email.send();

String cleanSubject = StatsAggHtmlFramework.removeNewlinesFromString(emailSubject, ' ');
String cleanBody = StatsAggHtmlFramework.removeNewlinesFromString(emailBody, ' ');
String cleanSubject = StringUtilities.removeNewlinesFromString(emailSubject, ' ');
String cleanBody = StringUtilities.removeNewlinesFromString(emailBody, ' ');
logger.info("Message=\"Send email alert\", EmailSubject=\"" + cleanSubject + "\"" + ", EmailBody=\"" + cleanBody + "\"");
}
catch (Exception e) {
String cleanSubject = StatsAggHtmlFramework.removeNewlinesFromString(emailSubject, ' ');
String cleanSubject = StringUtilities.removeNewlinesFromString(emailSubject, ' ');
logger.error("Message=\"Failed to send email alert. SMTP failure.\", " + "EmailSubject=\"" + cleanSubject + "\", " +
e.toString() + System.lineSeparator() + StackTrace.getStringFromStackTrace(e));
}
Expand Down
22 changes: 11 additions & 11 deletions src/main/java/com/pearson/statsagg/globals/GlobalVariables.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ public class GlobalVariables {
public final static ConcurrentHashMap<Long,OpenTsdbMetric> openTsdbMetrics = new ConcurrentHashMap<>();

// k=MetricKey, v="Aggregated metric object"
public final static ConcurrentHashMap<String,StatsdMetricAggregated> statsdMetricsAggregatedMostRecentValue = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,GraphiteMetric> graphiteAggregatedMetricsMostRecentValue = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,GraphiteMetric> graphitePassthroughMetricsMostRecentValue = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,OpenTsdbMetric> openTsdbMetricsMostRecentValue = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,StatsdMetricAggregated> statsdMetricsAggregatedMostRecentValue = new ConcurrentHashMap<>(16, 0.75f, 3);
public final static ConcurrentHashMap<String,GraphiteMetric> graphiteAggregatedMetricsMostRecentValue = new ConcurrentHashMap<>(16, 0.75f, 3);
public final static ConcurrentHashMap<String,GraphiteMetric> graphitePassthroughMetricsMostRecentValue = new ConcurrentHashMap<>(16, 0.75f, 3);
public final static ConcurrentHashMap<String,OpenTsdbMetric> openTsdbMetricsMostRecentValue = new ConcurrentHashMap<>(16, 0.75f, 3);

// k=MetricKey, v=Gauge (kept in sync with the database)
public final static ConcurrentHashMap<String,Gauge> statsdGaugeCache = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,Gauge> statsdGaugeCache = new ConcurrentHashMap<>(16, 0.75f, 3);

// k=MetricKey, v=MetricKey (k=v. The cleanup routine will cleanup these metrics ASAP (regardless of whether they're tracked an alert or not).
public final static ConcurrentHashMap<String,String> immediateCleanupMetrics = new ConcurrentHashMap<>();
Expand All @@ -76,10 +76,10 @@ public class GlobalVariables {
public final static ConcurrentHashMap<Integer,Byte> metricGroupChanges = new ConcurrentHashMap<>();

// k=MetricKey, v="The most timestamp that this metric was received by this program"
public final static ConcurrentHashMap<String,Long> metricKeysLastSeenTimestamp = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,Long> metricKeysLastSeenTimestamp = new ConcurrentHashMap<>(16, 0.75f, 6);

// k=MetricKey, v="The most timestamp that this metric was received by this program. Gets updated if the metric is configured to send 0 or previous value when no new metrics were received."
public final static ConcurrentHashMap<String,Long> metricKeysLastSeenTimestamp_UpdateOnResend = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,Long> metricKeysLastSeenTimestamp_UpdateOnResend = new ConcurrentHashMap<>(16, 0.75f, 6);

// k=MetricGroupId, v=Set<MetricKey> "is the metric key associated with a specific metric group? only include in the set if the assocation/match is true.">
public final static ConcurrentHashMap<Integer,Set<String>> matchingMetricKeysAssociatedWithMetricGroup = new ConcurrentHashMap<>();
Expand All @@ -91,7 +91,7 @@ public class GlobalVariables {
public final static ConcurrentHashMap<Integer,String> mergedRegexsForMetricGroups = new ConcurrentHashMap<>();

// k=MetricKey, v=List<MetricTimestampAndValue> (should be -- synchronizedSet(HashSet<MetricTimestampAndValue>()))
public final static ConcurrentHashMap<String,Set<MetricTimestampAndValue>> recentMetricTimestampsAndValuesByMetricKey = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,Set<MetricTimestampAndValue>> recentMetricTimestampsAndValuesByMetricKey = new ConcurrentHashMap<>(16, 0.75f, 6);

// k=MetricGroupRegex-pattern, v="MetricGroupRegex-pattern compiled pattern. This is a cache for compiled regex patterns."
public final static ConcurrentHashMap<String,Pattern> metricGroupRegexPatterns = new ConcurrentHashMap<>();
Expand All @@ -112,13 +112,13 @@ public class GlobalVariables {
public final static ConcurrentHashMap<Integer,Alert> pendingDangerAlertsByAlertId = new ConcurrentHashMap<>();

// k=MetricKey, v=MetricKey
public static final ConcurrentHashMap<String,String> activeAvailabilityAlerts = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<String,String> activeAvailabilityAlerts = new ConcurrentHashMap<>();

// k=AlertId, v=Set<MetricKey>
public static final ConcurrentHashMap<Integer,Set<String>> activeCautionAvailabilityAlerts = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<Integer,Set<String>> activeCautionAvailabilityAlerts = new ConcurrentHashMap<>();

// k=AlertId, v=Set<MetricKey>
public static final ConcurrentHashMap<Integer,Set<String>> activeDangerAvailabilityAlerts = new ConcurrentHashMap<>();
public final static ConcurrentHashMap<Integer,Set<String>> activeDangerAvailabilityAlerts = new ConcurrentHashMap<>();

// k="{metricKey}-{alertId}", v='Alert routine calculated metric value'
public final static ConcurrentHashMap<String,BigDecimal> activeCautionAlertMetricValues = new ConcurrentHashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import com.pearson.statsagg.metric_aggregation.graphite.GraphiteMetric;
import com.pearson.statsagg.utilities.MathUtilities;
import com.pearson.statsagg.utilities.Threads;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
Expand Down
23 changes: 23 additions & 0 deletions src/main/java/com/pearson/statsagg/utilities/StringUtilities.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.pearson.statsagg.utilities;

import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -59,4 +60,26 @@ public static String createMergedRegex(List<String> regexes) {
return mergedRegexStringBuilder.toString();
}

public static String removeNewlinesFromString(String inputString) {

if ((inputString == null) || inputString.isEmpty()) {
return inputString;
}

String cleanedString = StringUtils.remove(inputString, '\r');
cleanedString = StringUtils.remove(cleanedString, '\n');

return cleanedString;
}

public static String removeNewlinesFromString(String inputString, char newlineReplacementCharacter) {

if ((inputString == null) || inputString.isEmpty()) {
return inputString;
}

String cleanedString = inputString.replace('\n', newlineReplacementCharacter).replace('\r', newlineReplacementCharacter);
return cleanedString;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.pearson.statsagg.utilities.DateAndTime;
import com.pearson.statsagg.utilities.KeyValue;
import com.pearson.statsagg.utilities.StackTrace;
import com.pearson.statsagg.utilities.StringUtilities;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
Expand Down Expand Up @@ -168,7 +169,7 @@ private boolean forgetMetricAndReloadPage(String metricToForget, String alertNam

String trimmedMetricToForget = metricToForget.trim();
GlobalVariables.immediateCleanupMetrics.put(trimmedMetricToForget, trimmedMetricToForget);
String cleanMetricKey = StatsAggHtmlFramework.removeNewlinesFromString(trimmedMetricToForget);
String cleanMetricKey = StringUtilities.removeNewlinesFromString(trimmedMetricToForget);
logger.info("Action=AlertAssociations_ForgetMetric, " + "MetricKey=\"" + cleanMetricKey + "\"");

if (GlobalVariables.cleanupInvokerThread != null) GlobalVariables.cleanupInvokerThread.runCleanupThread();
Expand Down
Loading

0 comments on commit dcb2ebe

Please sign in to comment.