Search in sources :

Example 1 with TopicAnomaly

use of com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly in project cruise-control by linkedin.

the class AlertaSelfHealingNotifier method alert.

@Override
public void alert(Anomaly anomaly, boolean autoFixTriggered, long selfHealingStartTime, AnomalyType anomalyType) {
    super.alert(anomaly, autoFixTriggered, selfHealingStartTime, anomalyType);
    if (_alertaApiUrl == null) {
        LOG.warn("Alerta API URL is null, can't send Alerta.io self healing notification");
        return;
    }
    if (_alertaApiKey == null) {
        LOG.warn("Alerta API key is null, can't send Alerta.io self healing notification");
        return;
    }
    String text = String.format("%s detected %s. Self healing %s.%s", anomalyType, anomaly, _selfHealingEnabled.get(anomalyType) ? String.format("start time %s", utcDateFor(selfHealingStartTime)) : "is disabled", autoFixTriggered ? "%nSelf-healing has been triggered." : "");
    String tmpLocalHostname;
    try {
        tmpLocalHostname = InetAddress.getLocalHost().getCanonicalHostName();
    } catch (UnknownHostException e) {
        LOG.warn("Unable to get the hostname of the Cruise Control server", e);
        tmpLocalHostname = ALERT_CRUISE_CONTROL;
    }
    final String localHostname = tmpLocalHostname;
    List<AlertaMessage> alertaMessages = new ArrayList<>();
    switch((KafkaAnomalyType) anomalyType) {
        case GOAL_VIOLATION:
            GoalViolations goalViolations = (GoalViolations) anomaly;
            alertGoalViolation(anomalyType, localHostname, alertaMessages, goalViolations);
            break;
        case BROKER_FAILURE:
            BrokerFailures brokerFailures = (BrokerFailures) anomaly;
            alertBrokerFailure(anomalyType, localHostname, alertaMessages, brokerFailures);
            break;
        case METRIC_ANOMALY:
            KafkaMetricAnomaly metricAnomaly = (KafkaMetricAnomaly) anomaly;
            alertMetricAnomaly(anomalyType, localHostname, alertaMessages, metricAnomaly);
            break;
        case DISK_FAILURE:
            DiskFailures diskFailures = (DiskFailures) anomaly;
            alertDiskFailure(anomalyType, localHostname, alertaMessages, diskFailures);
            break;
        case TOPIC_ANOMALY:
            TopicAnomaly topicAnomaly = (TopicAnomaly) anomaly;
            alertTopicAnomaly(anomalyType, localHostname, alertaMessages, topicAnomaly);
            break;
        case MAINTENANCE_EVENT:
            MaintenanceEvent maintenanceEvent = (MaintenanceEvent) anomaly;
            alertMaintenanceEvent(anomalyType, localHostname, alertaMessages, maintenanceEvent);
            break;
        default:
            throw new IllegalStateException("Unrecognized anomaly type.");
    }
    for (AlertaMessage alertaMessage : alertaMessages) {
        alertaMessage.setEnvironment(_alertaEnvironment);
        alertaMessage.setService(Collections.singletonList(ALERT_CRUISE_CONTROL));
        alertaMessage.setText(text);
        alertaMessage.setOrigin(ALERT_CRUISE_CONTROL + "/" + localHostname);
        alertaMessage.setType(ALERT_CRUISE_CONTROL_ALARM);
        alertaMessage.setRawData(anomaly.toString());
        alertaMessage.setTags(Collections.singletonList(ALERT_ALARM_ID_TAG_KEY + ":" + anomaly.anomalyId()));
        try {
            sendAlertaMessage(alertaMessage);
        } catch (IOException e) {
            LOG.warn("ERROR sending alert to Alerta.io", e);
        }
    }
}
Also used : TopicAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly) MaintenanceEvent(com.linkedin.kafka.cruisecontrol.detector.MaintenanceEvent) UnknownHostException(java.net.UnknownHostException) KafkaMetricAnomaly(com.linkedin.kafka.cruisecontrol.detector.KafkaMetricAnomaly) ArrayList(java.util.ArrayList) IOException(java.io.IOException) GoalViolations(com.linkedin.kafka.cruisecontrol.detector.GoalViolations) BrokerFailures(com.linkedin.kafka.cruisecontrol.detector.BrokerFailures) DiskFailures(com.linkedin.kafka.cruisecontrol.detector.DiskFailures)

Example 2 with TopicAnomaly

use of com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly in project cruise-control by linkedin.

the class AlertaSelfHealingNotifier method alertTopicAnomaly.

private void alertTopicAnomaly(AnomalyType anomalyType, final String localHostname, List<AlertaMessage> alertaMessages, TopicAnomaly topicAnomaly) {
    if (topicAnomaly instanceof TopicPartitionSizeAnomaly) {
        TopicPartitionSizeAnomaly topicPartitionSizeAnomaly = (TopicPartitionSizeAnomaly) topicAnomaly;
        for (Map.Entry<TopicPartition, Double> entry : topicPartitionSizeAnomaly.sizeInMbByPartition().entrySet()) {
            AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_PARTITION_SIZE_ANOMALY + entry.getKey().toString());
            alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
            alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
            alertaMessage.setValue(String.format("%f MB", entry.getValue()));
            alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
            alertaMessages.add(alertaMessage);
        }
    } else if (topicAnomaly instanceof TopicReplicationFactorAnomaly) {
        TopicReplicationFactorAnomaly topicReplicationFactorAnomaly = (TopicReplicationFactorAnomaly) topicAnomaly;
        for (Entry<Short, Set<TopicReplicationFactorAnomalyEntry>> entry : topicReplicationFactorAnomaly.badTopicsByDesiredRF().entrySet()) {
            entry.getValue().forEach(topicReplicationFactorAnomalyEntry -> {
                AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_REPLICATION_FACTOR_ANOMALY + topicReplicationFactorAnomalyEntry.topicName());
                alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
                alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
                alertaMessage.setValue(String.format("%.2f", topicReplicationFactorAnomalyEntry.violationRatio()));
                alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
                alertaMessages.add(alertaMessage);
            });
        }
    } else {
        AlertaMessage alertaMessage = new AlertaMessage(localHostname, anomalyType.toString());
        alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
        alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
        alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
        alertaMessages.add(alertaMessage);
    }
}
Also used : GoalViolations(com.linkedin.kafka.cruisecontrol.detector.GoalViolations) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) BrokerFailures(com.linkedin.kafka.cruisecontrol.detector.BrokerFailures) LoggerFactory(org.slf4j.LoggerFactory) KafkaMetricAnomaly(com.linkedin.kafka.cruisecontrol.detector.KafkaMetricAnomaly) TopicAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly) ArrayList(java.util.ArrayList) InetAddress(java.net.InetAddress) Anomaly(com.linkedin.cruisecontrol.detector.Anomaly) TopicReplicationFactorAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly) DiskFailures(com.linkedin.kafka.cruisecontrol.detector.DiskFailures) Map(java.util.Map) MaintenanceEvent(com.linkedin.kafka.cruisecontrol.detector.MaintenanceEvent) TopicPartition(org.apache.kafka.common.TopicPartition) CruiseControlUtils.utcDateFor(com.linkedin.cruisecontrol.CruiseControlUtils.utcDateFor) Logger(org.slf4j.Logger) CruiseControlUtils(com.linkedin.cruisecontrol.CruiseControlUtils) Time(org.apache.kafka.common.utils.Time) AnomalyType(com.linkedin.cruisecontrol.detector.AnomalyType) Set(java.util.Set) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) List(java.util.List) ChronoUnit(java.time.temporal.ChronoUnit) TopicReplicationFactorAnomalyEntry(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry) Entry(java.util.Map.Entry) TopicPartitionSizeAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicPartitionSizeAnomaly) Collections(java.util.Collections) TopicReplicationFactorAnomalyEntry(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry) Entry(java.util.Map.Entry) TopicPartition(org.apache.kafka.common.TopicPartition) TopicPartitionSizeAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicPartitionSizeAnomaly) Map(java.util.Map) TopicReplicationFactorAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly) TopicReplicationFactorAnomalyEntry(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry)

Aggregations

BrokerFailures (com.linkedin.kafka.cruisecontrol.detector.BrokerFailures)2 DiskFailures (com.linkedin.kafka.cruisecontrol.detector.DiskFailures)2 GoalViolations (com.linkedin.kafka.cruisecontrol.detector.GoalViolations)2 KafkaMetricAnomaly (com.linkedin.kafka.cruisecontrol.detector.KafkaMetricAnomaly)2 MaintenanceEvent (com.linkedin.kafka.cruisecontrol.detector.MaintenanceEvent)2 TopicAnomaly (com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly)2 IOException (java.io.IOException)2 UnknownHostException (java.net.UnknownHostException)2 ArrayList (java.util.ArrayList)2 CruiseControlUtils (com.linkedin.cruisecontrol.CruiseControlUtils)1 CruiseControlUtils.utcDateFor (com.linkedin.cruisecontrol.CruiseControlUtils.utcDateFor)1 Anomaly (com.linkedin.cruisecontrol.detector.Anomaly)1 AnomalyType (com.linkedin.cruisecontrol.detector.AnomalyType)1 TopicPartitionSizeAnomaly (com.linkedin.kafka.cruisecontrol.detector.TopicPartitionSizeAnomaly)1 TopicReplicationFactorAnomaly (com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly)1 TopicReplicationFactorAnomalyEntry (com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry)1 BrokerEntity (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity)1 InetAddress (java.net.InetAddress)1 ChronoUnit (java.time.temporal.ChronoUnit)1 Collections (java.util.Collections)1