use of com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly in project cruise-control by linkedin.
the class AlertaSelfHealingNotifier method alert.
@Override
public void alert(Anomaly anomaly, boolean autoFixTriggered, long selfHealingStartTime, AnomalyType anomalyType) {
super.alert(anomaly, autoFixTriggered, selfHealingStartTime, anomalyType);
if (_alertaApiUrl == null) {
LOG.warn("Alerta API URL is null, can't send Alerta.io self healing notification");
return;
}
if (_alertaApiKey == null) {
LOG.warn("Alerta API key is null, can't send Alerta.io self healing notification");
return;
}
String text = String.format("%s detected %s. Self healing %s.%s", anomalyType, anomaly, _selfHealingEnabled.get(anomalyType) ? String.format("start time %s", utcDateFor(selfHealingStartTime)) : "is disabled", autoFixTriggered ? "%nSelf-healing has been triggered." : "");
String tmpLocalHostname;
try {
tmpLocalHostname = InetAddress.getLocalHost().getCanonicalHostName();
} catch (UnknownHostException e) {
LOG.warn("Unable to get the hostname of the Cruise Control server", e);
tmpLocalHostname = ALERT_CRUISE_CONTROL;
}
final String localHostname = tmpLocalHostname;
List<AlertaMessage> alertaMessages = new ArrayList<>();
switch((KafkaAnomalyType) anomalyType) {
case GOAL_VIOLATION:
GoalViolations goalViolations = (GoalViolations) anomaly;
alertGoalViolation(anomalyType, localHostname, alertaMessages, goalViolations);
break;
case BROKER_FAILURE:
BrokerFailures brokerFailures = (BrokerFailures) anomaly;
alertBrokerFailure(anomalyType, localHostname, alertaMessages, brokerFailures);
break;
case METRIC_ANOMALY:
KafkaMetricAnomaly metricAnomaly = (KafkaMetricAnomaly) anomaly;
alertMetricAnomaly(anomalyType, localHostname, alertaMessages, metricAnomaly);
break;
case DISK_FAILURE:
DiskFailures diskFailures = (DiskFailures) anomaly;
alertDiskFailure(anomalyType, localHostname, alertaMessages, diskFailures);
break;
case TOPIC_ANOMALY:
TopicAnomaly topicAnomaly = (TopicAnomaly) anomaly;
alertTopicAnomaly(anomalyType, localHostname, alertaMessages, topicAnomaly);
break;
case MAINTENANCE_EVENT:
MaintenanceEvent maintenanceEvent = (MaintenanceEvent) anomaly;
alertMaintenanceEvent(anomalyType, localHostname, alertaMessages, maintenanceEvent);
break;
default:
throw new IllegalStateException("Unrecognized anomaly type.");
}
for (AlertaMessage alertaMessage : alertaMessages) {
alertaMessage.setEnvironment(_alertaEnvironment);
alertaMessage.setService(Collections.singletonList(ALERT_CRUISE_CONTROL));
alertaMessage.setText(text);
alertaMessage.setOrigin(ALERT_CRUISE_CONTROL + "/" + localHostname);
alertaMessage.setType(ALERT_CRUISE_CONTROL_ALARM);
alertaMessage.setRawData(anomaly.toString());
alertaMessage.setTags(Collections.singletonList(ALERT_ALARM_ID_TAG_KEY + ":" + anomaly.anomalyId()));
try {
sendAlertaMessage(alertaMessage);
} catch (IOException e) {
LOG.warn("ERROR sending alert to Alerta.io", e);
}
}
}
use of com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly in project cruise-control by linkedin.
the class AlertaSelfHealingNotifier method alertTopicAnomaly.
private void alertTopicAnomaly(AnomalyType anomalyType, final String localHostname, List<AlertaMessage> alertaMessages, TopicAnomaly topicAnomaly) {
if (topicAnomaly instanceof TopicPartitionSizeAnomaly) {
TopicPartitionSizeAnomaly topicPartitionSizeAnomaly = (TopicPartitionSizeAnomaly) topicAnomaly;
for (Map.Entry<TopicPartition, Double> entry : topicPartitionSizeAnomaly.sizeInMbByPartition().entrySet()) {
AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_PARTITION_SIZE_ANOMALY + entry.getKey().toString());
alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
alertaMessage.setValue(String.format("%f MB", entry.getValue()));
alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
alertaMessages.add(alertaMessage);
}
} else if (topicAnomaly instanceof TopicReplicationFactorAnomaly) {
TopicReplicationFactorAnomaly topicReplicationFactorAnomaly = (TopicReplicationFactorAnomaly) topicAnomaly;
for (Entry<Short, Set<TopicReplicationFactorAnomalyEntry>> entry : topicReplicationFactorAnomaly.badTopicsByDesiredRF().entrySet()) {
entry.getValue().forEach(topicReplicationFactorAnomalyEntry -> {
AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_REPLICATION_FACTOR_ANOMALY + topicReplicationFactorAnomalyEntry.topicName());
alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
alertaMessage.setValue(String.format("%.2f", topicReplicationFactorAnomalyEntry.violationRatio()));
alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
alertaMessages.add(alertaMessage);
});
}
} else {
AlertaMessage alertaMessage = new AlertaMessage(localHostname, anomalyType.toString());
alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
alertaMessages.add(alertaMessage);
}
}
Aggregations