Search in sources :

Example 1 with TopicReplicationFactorAnomaly

use of com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly in project cruise-control by linkedin.

the class SelfHealingNotifierTest method testSelfHealingDisabled.

@Test
public void testSelfHealingDisabled() {
    final long startTime = 500L;
    Time mockTime = new MockTime(startTime);
    KafkaCruiseControl mockKafkaCruiseControl = EasyMock.mock(KafkaCruiseControl.class);
    Properties props = KafkaCruiseControlUnitTestUtils.getKafkaCruiseControlProperties();
    KafkaCruiseControlConfig kafkaCruiseControlConfig = new KafkaCruiseControlConfig(props);
    EasyMock.expect(mockKafkaCruiseControl.config()).andReturn(kafkaCruiseControlConfig).atLeastOnce();
    EasyMock.replay(mockKafkaCruiseControl);
    TestingBrokerFailureAutoFixNotifier anomalyNotifier = new TestingBrokerFailureAutoFixNotifier(mockTime);
    Map<String, String> selfHealingExplicitlyDisabled = Map.of(SelfHealingNotifier.SELF_HEALING_BROKER_FAILURE_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_GOAL_VIOLATION_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_METRIC_ANOMALY_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_DISK_FAILURE_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_TOPIC_ANOMALY_ENABLED_CONFIG, "false", // Set to verify the overriding of specific config over general config
    SelfHealingNotifier.SELF_HEALING_ENABLED_CONFIG, "true");
    anomalyNotifier.configure(selfHealingExplicitlyDisabled);
    // (1) Test broker failure anomaly can be detected by notifier.
    final long failureTime1 = 200L;
    final long failureTime2 = 400L;
    Map<Integer, Long> failedBrokers = Map.of(1, failureTime1, 2, failureTime2);
    final long anomalyDetectionTime = 200L;
    final BrokerEntity brokerWithMetricAnomaly = new BrokerEntity("local", 1);
    mockTime.sleep(SelfHealingNotifier.DEFAULT_AUTO_FIX_THRESHOLD_MS + failureTime1);
    anomalyNotifier.resetAlert(KafkaAnomalyType.BROKER_FAILURE);
    Map<String, Object> parameterConfigOverrides = Map.of(KAFKA_CRUISE_CONTROL_OBJECT_CONFIG, mockKafkaCruiseControl, FAILED_BROKERS_OBJECT_CONFIG, failedBrokers, BROKER_FAILURES_FIXABLE_CONFIG, true, ANOMALY_DETECTION_TIME_MS_OBJECT_CONFIG, anomalyDetectionTime, METRIC_ANOMALY_FIXABLE_OBJECT_CONFIG, false, METRIC_ANOMALY_BROKER_ENTITIES_OBJECT_CONFIG, Collections.singletonMap(brokerWithMetricAnomaly, anomalyDetectionTime), FAILED_DISKS_OBJECT_CONFIG, Collections.singletonMap(1, Collections.singletonMap(BAD_LOGDIR, failureTime1)), SELF_HEALING_TARGET_TOPIC_REPLICATION_FACTOR_CONFIG, SELF_HEALING_TARGET_REPLICATION_FACTOR, BAD_TOPICS_BY_DESIRED_RF_CONFIG, Collections.singletonMap(SELF_HEALING_TARGET_REPLICATION_FACTOR, Collections.singleton(TOPIC_REPLICATION_FACTOR_ANOMALY_ENTRY)));
    AnomalyNotificationResult result = anomalyNotifier.onBrokerFailure(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.BROKER_FAILURES_CLASS_CONFIG, BrokerFailures.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.BROKER_FAILURE));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.BROKER_FAILURE));
    // (2) Test goal violation anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.GOAL_VIOLATION);
    result = anomalyNotifier.onGoalViolation(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.GOAL_VIOLATIONS_CLASS_CONFIG, GoalViolations.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.GOAL_VIOLATION));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.GOAL_VIOLATION));
    // (3) Test metric anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.METRIC_ANOMALY);
    result = anomalyNotifier.onMetricAnomaly(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.METRIC_ANOMALY_CLASS_CONFIG, KafkaMetricAnomaly.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.METRIC_ANOMALY));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.METRIC_ANOMALY));
    // (4) Test disk failure anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.DISK_FAILURE);
    result = anomalyNotifier.onDiskFailure(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.DISK_FAILURES_CLASS_CONFIG, DiskFailures.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.DISK_FAILURE));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.DISK_FAILURE));
    // (5) Test topic anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.TOPIC_ANOMALY);
    TopicReplicationFactorAnomaly topicReplicationFactorAnomaly = new TopicReplicationFactorAnomaly();
    topicReplicationFactorAnomaly.configure(parameterConfigOverrides);
    result = anomalyNotifier.onTopicAnomaly(topicReplicationFactorAnomaly);
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.TOPIC_ANOMALY));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.TOPIC_ANOMALY));
    EasyMock.verify(mockKafkaCruiseControl);
}
Also used : KafkaCruiseControl(com.linkedin.kafka.cruisecontrol.KafkaCruiseControl) MockTime(org.apache.kafka.common.utils.MockTime) Time(org.apache.kafka.common.utils.Time) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) Properties(java.util.Properties) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) BrokerFailures(com.linkedin.kafka.cruisecontrol.detector.BrokerFailures) TopicReplicationFactorAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly) MockTime(org.apache.kafka.common.utils.MockTime) Test(org.junit.Test)

Example 2 with TopicReplicationFactorAnomaly

use of com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly in project cruise-control by linkedin.

the class AlertaSelfHealingNotifier method alertTopicAnomaly.

private void alertTopicAnomaly(AnomalyType anomalyType, final String localHostname, List<AlertaMessage> alertaMessages, TopicAnomaly topicAnomaly) {
    if (topicAnomaly instanceof TopicPartitionSizeAnomaly) {
        TopicPartitionSizeAnomaly topicPartitionSizeAnomaly = (TopicPartitionSizeAnomaly) topicAnomaly;
        for (Map.Entry<TopicPartition, Double> entry : topicPartitionSizeAnomaly.sizeInMbByPartition().entrySet()) {
            AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_PARTITION_SIZE_ANOMALY + entry.getKey().toString());
            alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
            alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
            alertaMessage.setValue(String.format("%f MB", entry.getValue()));
            alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
            alertaMessages.add(alertaMessage);
        }
    } else if (topicAnomaly instanceof TopicReplicationFactorAnomaly) {
        TopicReplicationFactorAnomaly topicReplicationFactorAnomaly = (TopicReplicationFactorAnomaly) topicAnomaly;
        for (Entry<Short, Set<TopicReplicationFactorAnomalyEntry>> entry : topicReplicationFactorAnomaly.badTopicsByDesiredRF().entrySet()) {
            entry.getValue().forEach(topicReplicationFactorAnomalyEntry -> {
                AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_REPLICATION_FACTOR_ANOMALY + topicReplicationFactorAnomalyEntry.topicName());
                alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
                alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
                alertaMessage.setValue(String.format("%.2f", topicReplicationFactorAnomalyEntry.violationRatio()));
                alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
                alertaMessages.add(alertaMessage);
            });
        }
    } else {
        AlertaMessage alertaMessage = new AlertaMessage(localHostname, anomalyType.toString());
        alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
        alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
        alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
        alertaMessages.add(alertaMessage);
    }
}
Also used : GoalViolations(com.linkedin.kafka.cruisecontrol.detector.GoalViolations) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) BrokerFailures(com.linkedin.kafka.cruisecontrol.detector.BrokerFailures) LoggerFactory(org.slf4j.LoggerFactory) KafkaMetricAnomaly(com.linkedin.kafka.cruisecontrol.detector.KafkaMetricAnomaly) TopicAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly) ArrayList(java.util.ArrayList) InetAddress(java.net.InetAddress) Anomaly(com.linkedin.cruisecontrol.detector.Anomaly) TopicReplicationFactorAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly) DiskFailures(com.linkedin.kafka.cruisecontrol.detector.DiskFailures) Map(java.util.Map) MaintenanceEvent(com.linkedin.kafka.cruisecontrol.detector.MaintenanceEvent) TopicPartition(org.apache.kafka.common.TopicPartition) CruiseControlUtils.utcDateFor(com.linkedin.cruisecontrol.CruiseControlUtils.utcDateFor) Logger(org.slf4j.Logger) CruiseControlUtils(com.linkedin.cruisecontrol.CruiseControlUtils) Time(org.apache.kafka.common.utils.Time) AnomalyType(com.linkedin.cruisecontrol.detector.AnomalyType) Set(java.util.Set) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) List(java.util.List) ChronoUnit(java.time.temporal.ChronoUnit) TopicReplicationFactorAnomalyEntry(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry) Entry(java.util.Map.Entry) TopicPartitionSizeAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicPartitionSizeAnomaly) Collections(java.util.Collections) TopicReplicationFactorAnomalyEntry(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry) Entry(java.util.Map.Entry) TopicPartition(org.apache.kafka.common.TopicPartition) TopicPartitionSizeAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicPartitionSizeAnomaly) Map(java.util.Map) TopicReplicationFactorAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly) TopicReplicationFactorAnomalyEntry(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry)

Aggregations

BrokerFailures (com.linkedin.kafka.cruisecontrol.detector.BrokerFailures)2 TopicReplicationFactorAnomaly (com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly)2 BrokerEntity (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity)2 Time (org.apache.kafka.common.utils.Time)2 CruiseControlUtils (com.linkedin.cruisecontrol.CruiseControlUtils)1 CruiseControlUtils.utcDateFor (com.linkedin.cruisecontrol.CruiseControlUtils.utcDateFor)1 Anomaly (com.linkedin.cruisecontrol.detector.Anomaly)1 AnomalyType (com.linkedin.cruisecontrol.detector.AnomalyType)1 KafkaCruiseControl (com.linkedin.kafka.cruisecontrol.KafkaCruiseControl)1 KafkaCruiseControlConfig (com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig)1 DiskFailures (com.linkedin.kafka.cruisecontrol.detector.DiskFailures)1 GoalViolations (com.linkedin.kafka.cruisecontrol.detector.GoalViolations)1 KafkaMetricAnomaly (com.linkedin.kafka.cruisecontrol.detector.KafkaMetricAnomaly)1 MaintenanceEvent (com.linkedin.kafka.cruisecontrol.detector.MaintenanceEvent)1 TopicAnomaly (com.linkedin.kafka.cruisecontrol.detector.TopicAnomaly)1 TopicPartitionSizeAnomaly (com.linkedin.kafka.cruisecontrol.detector.TopicPartitionSizeAnomaly)1 TopicReplicationFactorAnomalyEntry (com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly.TopicReplicationFactorAnomalyEntry)1 IOException (java.io.IOException)1 InetAddress (java.net.InetAddress)1 UnknownHostException (java.net.UnknownHostException)1