Skip to content

Commit

Permalink
refactor(alarm): optimize alertNotifyRecord (#765)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsy1001de authored Dec 21, 2023
1 parent 5b6424d commit 87db137
Show file tree
Hide file tree
Showing 21 changed files with 289 additions and 251 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
*/
package io.holoinsight.server.home.alert.model.compute;

import io.holoinsight.server.home.dal.model.AlertNotifyRecord;
import io.holoinsight.server.home.facade.AlertNotifyRecordDTO;
import io.holoinsight.server.home.facade.InspectConfig;
import lombok.Data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ public static AlertNotify eventInfoConvert(EventInfo eventInfo, InspectConfig in
alertNotify.setAlertIp(AddressUtil.getHostAddress());
}
} catch (Exception e) {
RecordSucOrFailNotify.alertNotifyProcess("event convert alert notify exception" + e,
RecordSucOrFailNotify.alertNotifyProcessFail("event convert alert notify exception" + e,
"alert task compute", "event convert alert notify", alertNotify.getAlertNotifyRecord());
throw e;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ public void handle(List<AlertNotify> alertNotifies) {
LOGGER.error(
"[HoloinsightAlertInternalException][AlertNotifyHandler][1] {} fail to alert_notify_handle for {}",
alertNotify.getTraceId(), e.getMessage(), e);
RecordSucOrFailNotify.alertNotifyProcess(
"fail to alert_notify_handle for " + e.getMessage(), NOTIFY_HANDLER,
RecordSucOrFailNotify.alertNotifyProcessFail(
"fail to alert notify handle for " + e.getMessage(), NOTIFY_HANDLER,
"alert notify handle", alertNotify.getAlertNotifyRecord());
} finally {
if (latch != null && alertNotify.isAlertRecord()) {
Expand All @@ -90,7 +90,7 @@ public void handle(List<AlertNotify> alertNotifies) {
}
if (status) {
LOGGER.info("alert notify record data size {} .", recordLatch.size());
RecordSucOrFailNotify.batchInsert(recordLatch.getAlertNotifyRecordDTOList());
// RecordSucOrFailNotify.batchInsert(recordLatch.getAlertNotifyRecordDTOList());
}

LOGGER.info("alert_notification_notify_step size [{}]", count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,8 @@ private void makeAlertHistory(Map<String, AlarmHistory> alertHistoryMap,
RecordSucOrFailNotify.alertNotifyProcessSuc(SAVE_HISTORY, "save history",
alertNotify.getAlertNotifyRecord());
} catch (Exception e) {
RecordSucOrFailNotify.alertNotifyProcess(
alertNotify.getTraceId() + "fail to alert_history_save for" + e.getMessage(),
RecordSucOrFailNotify.alertNotifyProcessFail(
alertNotify.getTraceId() + "fail to alert history save for" + e.getMessage(),
SAVE_HISTORY, "save history", alertNotify.getAlertNotifyRecord());
LOGGER.error(
"[HoloinsightAlertInternalException][AlertSaveHistoryHandler][1] {} fail to alert_history_save for {}",
Expand Down Expand Up @@ -569,12 +569,12 @@ private void makeAlertRecover(Map<String, AlarmHistory> alertHistoryDOMap,
AlarmHistory alertHistory = alertHistoryDOMap.get(alertNotify.getUniqueId());
alertHistory.setRecoverTime(new Date(alertNotify.getAlarmTime()));
alarmHistoryDOMapper.updateById(alertHistory);
RecordSucOrFailNotify.alertNotifyProcessSuc(SAVE_HISTORY, "save history is recover",
RecordSucOrFailNotify.alertNotifyProcessSuc(SAVE_HISTORY, "alert recover",
alertNotify.getAlertNotifyRecord());
} catch (Exception e) {
RecordSucOrFailNotify.alertNotifyProcess(
alertNotify.getTraceId() + "fail to alert_recover_update for" + e.getMessage(),
SAVE_HISTORY, "save history is recover", alertNotify.getAlertNotifyRecord());
RecordSucOrFailNotify.alertNotifyProcessFail(
alertNotify.getTraceId() + "fail to alert recover for" + e.getMessage(), SAVE_HISTORY,
"alert recover", alertNotify.getAlertNotifyRecord());
LOGGER.error(
"[HoloinsightAlertInternalException][AlertSaveHistoryHandler][1] {} fail to alert_recover_update for {}",
alertNotify.getTraceId(), e.getMessage(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,17 @@ public boolean sendAlertNotifyV3(AlertNotifyRequest notify, AlertNotifyRecordLat
String type = defaultNotifyChain.name;
if (CollectionUtils.isEmpty(notify.getNotifyDataInfos())) {
LOGGER.info("{} notify data info is empty.", traceId);
RecordSucOrFailNotify.alertNotifyProcess(traceId + ": notify data info is empty ", GATEWAY,
"check notify data info", notify.getAlertNotifyRecord());
RecordSucOrFailNotify.alertNotifyProcessFail(traceId + ": notify data info is empty ",
GATEWAY, "check notify data info", notify.getAlertNotifyRecord());
return true;
}

String ruleId = notify.getRuleId();
if (!StringUtils.isNumeric(ruleId)) {
LOGGER.warn("{} invalid rule {}", traceId, ruleId);
RecordSucOrFailNotify.alertNotifyProcess(traceId + ": invalid rule fail; ruleId is " + ruleId,
GATEWAY, "invalid rule", notify.getAlertNotifyRecord());
RecordSucOrFailNotify.alertNotifyProcessFail(
traceId + ": invalid rule fail; ruleId is " + ruleId, GATEWAY, "invalid rule",
notify.getAlertNotifyRecord());
return true;
}

Expand All @@ -99,7 +100,7 @@ public boolean sendAlertNotifyV3(AlertNotifyRequest notify, AlertNotifyRecordLat

if (CollectionUtils.isEmpty(notifyChainList)) {
LOGGER.info("{} {} notifyChainList is empty, skip.", traceId, ruleId);
RecordSucOrFailNotify.alertNotifyProcess(traceId + ": notifyChainList is empty, skip ",
RecordSucOrFailNotify.alertNotifyProcessFail(traceId + ": notifyChainList is empty, skip ",
GATEWAY, " get notify chainList", notify.getAlertNotifyRecord());
return true;
}
Expand All @@ -108,8 +109,9 @@ public boolean sendAlertNotifyV3(AlertNotifyRequest notify, AlertNotifyRecordLat
AlarmRuleDTO alertRule = this.alarmRuleConverter.doToDTO(rawRule);
if (alertRule == null) {
LOGGER.warn("{} can not find alarmRule by {}", traceId, ruleId);
RecordSucOrFailNotify.alertNotifyProcess(traceId + ": can not find alarmRule by " + ruleId,
GATEWAY, "find alarmRule", notify.getAlertNotifyRecord());
RecordSucOrFailNotify.alertNotifyProcessFail(
traceId + ": can not find alarmRule by " + ruleId, GATEWAY, "find alarmRule",
notify.getAlertNotifyRecord());
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ public void handle(List<AlertNotify> alertNotifies) {

// 查询消息通知订阅关系
alertNotifies.parallelStream().forEach(alertNotify -> {
RecordSucOrFailNotify.alertNotifyProcessSuc(GET_SUBSCRIPTION, "query subscriber",
alertNotify.getAlertNotifyRecord());
try {
QueryWrapper<AlarmSubscribe> alertSubscribeQueryWrapper = new QueryWrapper<>();
alertSubscribeQueryWrapper.eq("unique_id", alertNotify.getUniqueId());
Expand All @@ -109,7 +111,6 @@ public void handle(List<AlertNotify> alertNotifies) {
}

if (!CollectionUtils.isEmpty(alertSubscribeList)) {
Set<String> userIdList = new HashSet<>();
Set<Long> dingDingGroupIdList = new HashSet<>();
List<WebhookInfo> webhookInfos = new ArrayList<>();
Map<String/* notify type */, List<String>> userNotifyMap = new HashMap<>();
Expand Down Expand Up @@ -205,14 +206,20 @@ public void handle(List<AlertNotify> alertNotifies) {
alertNotify.setDingdingUrl(dingdingUrls);
}
}
RecordSucOrFailNotify.alertNotifyProcessSuc(GET_SUBSCRIPTION, "query subscription",
alertNotify.getAlertNotifyRecord());

if (CollectionUtils.isEmpty(alertNotify.getUserNotifyMap())
&& CollectionUtils.isEmpty(alertNotify.getWebhookInfos())
&& CollectionUtils.isEmpty(alertNotify.getDingdingUrl())) {
RecordSucOrFailNotify.alertNotifyProcessFail("query subscriber is empty",
GET_SUBSCRIPTION, "query subscriber error", alertNotify.getAlertNotifyRecord());
}
} catch (Throwable e) {
LOGGER.error(
"[HoloinsightAlertInternalException][GetSubscriptionHandler][1] {} fail to get_subscription for {}",
alertNotify.getTraceId(), e.getMessage(), e);
RecordSucOrFailNotify.alertNotifyProcess("fail to get_subscription for " + e.getMessage(),
GET_SUBSCRIPTION, "query subscription", alertNotify.getAlertNotifyRecord());
RecordSucOrFailNotify.alertNotifyProcessFail(
"query subscriber is error, " + e.getMessage(), GET_SUBSCRIPTION,
"query subscriber error", alertNotify.getAlertNotifyRecord());
}
});
LOGGER.info("[GetSubscriptionHandler][{}] finish to get_subscription.", alertNotifies.size());
Expand All @@ -239,8 +246,6 @@ private void handleBlock(List<AlertNotify> alertNotifies) {
AlertNotify alertNotify = iterator.next();

if (alertNotify.getIsRecover()) {
RecordSucOrFailNotify.alertNotifyProcess("alertNotify is recover", GET_SUBSCRIPTION,
"remove block subscription", alertNotify.getAlertNotifyRecord());
iterator.remove();
LOGGER.info("{} alert rule {} has recovered.", alertNotify.getTraceId(),
alertNotify.getUniqueId());
Expand All @@ -253,8 +258,6 @@ private void handleBlock(List<AlertNotify> alertNotifies) {
}

if (StringUtils.isEmpty(alertBlock.getTags()) || alertBlock.getTags().equals("{}")) {
RecordSucOrFailNotify.alertNotifyProcess("alertNotify is blocked", GET_SUBSCRIPTION,
"remove block subscription", alertNotify.getAlertNotifyRecord());
iterator.remove();
LOGGER.info("{} alert rule {} has been blocked.", alertNotify.getTraceId(),
alertNotify.getUniqueId());
Expand All @@ -278,6 +281,8 @@ private void handleBlock(List<AlertNotify> alertNotifies) {
if (matcher.find()) {
LOGGER.info("{} pql alert rule {} tag {} {} has been blocked.",
alertNotify.getTraceId(), alertNotify.getUniqueId(), key, value);
RecordSucOrFailNotify.alertNotifyProcessSuc(GET_SUBSCRIPTION, "alarm block",
alertNotify.getAlertNotifyRecord());
it.remove();
}
}
Expand All @@ -286,6 +291,8 @@ private void handleBlock(List<AlertNotify> alertNotifies) {
if (CollectionUtils.isEmpty(pqlRule.getDataResult())) {
LOGGER.info("{} pql alert rule {} has been blocked because all tags have been blocked.",
alertNotify.getTraceId(), alertNotify.getUniqueId());
RecordSucOrFailNotify.alertNotifyProcessSuc(GET_SUBSCRIPTION, "alarm block",
alertNotify.getAlertNotifyRecord());
iterator.remove();
}
} else {
Expand All @@ -304,6 +311,8 @@ private void handleBlock(List<AlertNotify> alertNotifies) {
LOGGER.info("{} alert rule {} tag {} {} has been blocked.",
alertNotify.getTraceId(), alertNotify.getUniqueId(), key, value);
it.remove();
RecordSucOrFailNotify.alertNotifyProcessSuc(GET_SUBSCRIPTION, "alarm block",
alertNotify.getAlertNotifyRecord());
}
}
});
Expand All @@ -315,15 +324,14 @@ private void handleBlock(List<AlertNotify> alertNotifies) {
if (CollectionUtils.isEmpty(notifyDataInfos)) {
LOGGER.info("{} alert rule {} has been blocked because all tags have been blocked.",
alertNotify.getTraceId(), alertNotify.getUniqueId());
RecordSucOrFailNotify.alertNotifyProcessSuc(GET_SUBSCRIPTION, "alarm block",
alertNotify.getAlertNotifyRecord());
iterator.remove();
} else {
alertNotify.setNotifyDataInfos(notifyDataInfos);
}
}
}

RecordSucOrFailNotify.alertNotifyProcessSuc(GET_SUBSCRIPTION, "remove block subscription",
alertNotify.getAlertNotifyRecord());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
*/
package io.holoinsight.server.home.alert.service.calculate;

import io.holoinsight.server.home.alert.common.G;
import io.holoinsight.server.home.alert.model.compute.ComputeContext;
import io.holoinsight.server.home.alert.model.compute.ComputeInfo;
import io.holoinsight.server.home.alert.model.event.EventInfo;
Expand Down Expand Up @@ -72,7 +71,7 @@ public EventInfo eval(ComputeContext context,
events = runRule(inspectConfig, period, alertNotifyRecordDTOList);
}
} catch (Throwable ex) {
RecordSucOrFailNotify.alertNotifyProcess("AlertTaskCompute Exception: " + ex,
RecordSucOrFailNotify.alertNotifyProcessFail("AlertTaskCompute Exception: " + ex,
ALERT_TASK_COMPUTE, "alert task compute", inspectConfig.getAlertNotifyRecord());
logger.error("ALERT_EXCEPTION_MONITOR inspectConfig {}, traceId: {} ",
inspectConfig.getUniqueId(), traceId, ex);
Expand All @@ -82,6 +81,19 @@ public EventInfo eval(ComputeContext context,

public EventInfo runPqlRule(InspectConfig inspectConfig, long period,
List<AlertNotifyRecordDTO> alertNotifyRecordDTOList) {

if (!inspectConfig.getIsPql()
|| CollectionUtils.isEmpty(inspectConfig.getPqlRule().getDataResult())) {
// record pql rule alert
AlertNotifyRecordDTO alertNotifyRecordDTO = inspectConfig.getAlertNotifyRecord();
if (Objects.nonNull(alertNotifyRecordDTO)) {
RecordSucOrFailNotify.alertNotifyNoEventGenerated("pql rule is empty", ALERT_TASK_COMPUTE,
"run pql rule", alertNotifyRecordDTO, null);
alertNotifyRecordDTOList.add(alertNotifyRecordDTO);
}
return null;
}

if (inspectConfig.getIsPql()
&& !CollectionUtils.isEmpty(inspectConfig.getPqlRule().getDataResult())) {
EventInfo eventInfo = new EventInfo();
Expand All @@ -94,15 +106,13 @@ public EventInfo runPqlRule(InspectConfig inspectConfig, long period,
Map<Trigger, List<TriggerResult>> triggerMap =
convertFromPql(inspectConfig.getPqlRule(), period, inspectConfig);
eventInfo.setAlarmTriggerResults(triggerMap);

RecordSucOrFailNotify.alertNotifyProcessSuc(ALERT_TASK_COMPUTE, "run pql rule",
inspectConfig.getAlertNotifyRecord());

return eventInfo;
}
// record pql rule alert
AlertNotifyRecordDTO alertNotifyRecordDTO = inspectConfig.getAlertNotifyRecord();
if (Objects.nonNull(alertNotifyRecordDTO)) {
RecordSucOrFailNotify.alertNotifyNoEventGenerated("pql rule is empty", ALERT_TASK_COMPUTE,
"run pql rule", alertNotifyRecordDTO, null);
alertNotifyRecordDTOList.add(alertNotifyRecordDTO);
}

// 恢复时这里返回null
return null;
}
Expand Down Expand Up @@ -190,6 +200,9 @@ public EventInfo runRule(InspectConfig inspectConfig, long period,
eventInfo.setAlarmTime(period);
eventInfo.setIsRecover(false);
eventInfo.setEnvType(inspectConfig.getEnvType());

RecordSucOrFailNotify.alertNotifyProcessSuc(ALERT_TASK_COMPUTE, "run rule",
inspectConfig.getAlertNotifyRecord());
return eventInfo;
}
// record no alarm event generated data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ protected List<EventInfo> calculate(ComputeTaskPackage computeTaskPackage) {
inspectConfig.getAlertNotifyRecord());
}
latch.await(30, TimeUnit.SECONDS);
if (!CollectionUtils.isEmpty(alertNotifyRecordDTOList)) {
RecordSucOrFailNotify.batchInsert(alertNotifyRecordDTOList);
}
// if (!CollectionUtils.isEmpty(alertNotifyRecordDTOList)) {
// RecordSucOrFailNotify.batchInsert(alertNotifyRecordDTOList);
// }
} catch (Exception e) {
LOGGER.error("AlertTaskCompute Exception", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import io.holoinsight.server.home.facade.PqlRule;
import io.holoinsight.server.home.facade.Rule;
import io.holoinsight.server.home.facade.trigger.Trigger;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -24,6 +25,7 @@
* @date 2022/2/22 4:51 下午
*/
@Service
@Slf4j
public class AlarmDataSet {

private static Logger LOGGER = LoggerFactory.getLogger(AlarmDataSet.class);
Expand All @@ -46,6 +48,7 @@ public void loadData(ComputeTaskPackage computeTaskPackage) {
}

computeTaskPackage.getInspectConfigs().parallelStream().forEach(inspectConfig -> {

// 处理pql告警逻辑
if (inspectConfig.getIsPql()) {
try {
Expand All @@ -56,21 +59,24 @@ public void loadData(ComputeTaskPackage computeTaskPackage) {
pqlRule.setDataResult(result);
inspectConfig.setPqlRule(pqlRule);
}
RecordSucOrFailNotify.alertNotifyProcessSuc(ALERT_TASK_COMPUTE, "Pql query",

RecordSucOrFailNotify.alertNotifyProcessSuc(ALERT_TASK_COMPUTE, "pql query",
inspectConfig.getAlertNotifyRecord());
} catch (Exception exception) {
RecordSucOrFailNotify.alertNotifyProcess("Pql query Exception: " + exception,
ALERT_TASK_COMPUTE, "Pql query", inspectConfig.getAlertNotifyRecord());
RecordSucOrFailNotify.alertNotifyProcessFail("pql query Exception: " + exception,
ALERT_TASK_COMPUTE, "pql query", inspectConfig.getAlertNotifyRecord());
LOGGER.error("Pql query Exception", exception);
}
}
// 处理current&ai告警逻辑
else {
} else {
// 处理rule&ai告警逻辑
Rule rule = inspectConfig.getRule();
if (rule == null || CollectionUtils.isEmpty(rule.getTriggers())) {
return;
}
boolean notifySuccess = true;
for (Trigger trigger : rule.getTriggers()) {
if (null == trigger || null == trigger.getType())
continue;
try {
// 接入统一数据源,查询数据信息
alarmLoadData = loadDataFactory.getLoadDataService(trigger.getType().getType());
Expand All @@ -79,12 +85,15 @@ public void loadData(ComputeTaskPackage computeTaskPackage) {
trigger.setDataResult(dataResults);
} catch (Exception exception) {
LOGGER.error("AlarmLoadData Exception", exception);
RecordSucOrFailNotify.alertNotifyProcess("alarm load data Exception: " + exception,
RecordSucOrFailNotify.alertNotifyProcessFail("alarm load data Exception: " + exception,
ALERT_TASK_COMPUTE, "alarm load data", inspectConfig.getAlertNotifyRecord());
notifySuccess = false;
}
}
RecordSucOrFailNotify.alertNotifyProcessSuc(ALERT_TASK_COMPUTE, "alarm load data",
inspectConfig.getAlertNotifyRecord());
if (notifySuccess) {
RecordSucOrFailNotify.alertNotifyProcessSuc(ALERT_TASK_COMPUTE, "alarm load data",
inspectConfig.getAlertNotifyRecord());
}
}

});
Expand Down
Loading

0 comments on commit 87db137

Please sign in to comment.