use of com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly in project cruise-control by linkedin.
the class SelfHealingNotifierTest method testSelfHealingDisabled.
@Test
public void testSelfHealingDisabled() {
final long startTime = 500L;
Time mockTime = new MockTime(startTime);
KafkaCruiseControl mockKafkaCruiseControl = EasyMock.mock(KafkaCruiseControl.class);
Properties props = KafkaCruiseControlUnitTestUtils.getKafkaCruiseControlProperties();
KafkaCruiseControlConfig kafkaCruiseControlConfig = new KafkaCruiseControlConfig(props);
EasyMock.expect(mockKafkaCruiseControl.config()).andReturn(kafkaCruiseControlConfig).atLeastOnce();
EasyMock.replay(mockKafkaCruiseControl);
TestingBrokerFailureAutoFixNotifier anomalyNotifier = new TestingBrokerFailureAutoFixNotifier(mockTime);
Map<String, String> selfHealingExplicitlyDisabled = Map.of(SelfHealingNotifier.SELF_HEALING_BROKER_FAILURE_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_GOAL_VIOLATION_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_METRIC_ANOMALY_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_DISK_FAILURE_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_TOPIC_ANOMALY_ENABLED_CONFIG, "false", // Set to verify the overriding of specific config over general config
SelfHealingNotifier.SELF_HEALING_ENABLED_CONFIG, "true");
anomalyNotifier.configure(selfHealingExplicitlyDisabled);
// (1) Test broker failure anomaly can be detected by notifier.
final long failureTime1 = 200L;
final long failureTime2 = 400L;
Map<Integer, Long> failedBrokers = Map.of(1, failureTime1, 2, failureTime2);
final long anomalyDetectionTime = 200L;
final BrokerEntity brokerWithMetricAnomaly = new BrokerEntity("local", 1);
mockTime.sleep(SelfHealingNotifier.DEFAULT_AUTO_FIX_THRESHOLD_MS + failureTime1);
anomalyNotifier.resetAlert(KafkaAnomalyType.BROKER_FAILURE);
Map<String, Object> parameterConfigOverrides = Map.of(KAFKA_CRUISE_CONTROL_OBJECT_CONFIG, mockKafkaCruiseControl, FAILED_BROKERS_OBJECT_CONFIG, failedBrokers, BROKER_FAILURES_FIXABLE_CONFIG, true, ANOMALY_DETECTION_TIME_MS_OBJECT_CONFIG, anomalyDetectionTime, METRIC_ANOMALY_FIXABLE_OBJECT_CONFIG, false, METRIC_ANOMALY_BROKER_ENTITIES_OBJECT_CONFIG, Collections.singletonMap(brokerWithMetricAnomaly, anomalyDetectionTime), FAILED_DISKS_OBJECT_CONFIG, Collections.singletonMap(1, Collections.singletonMap(BAD_LOGDIR, failureTime1)), SELF_HEALING_TARGET_TOPIC_REPLICATION_FACTOR_CONFIG, SELF_HEALING_TARGET_REPLICATION_FACTOR, BAD_TOPICS_BY_DESIRED_RF_CONFIG, Collections.singletonMap(SELF_HEALING_TARGET_REPLICATION_FACTOR, Collections.singleton(TOPIC_REPLICATION_FACTOR_ANOMALY_ENTRY)));
AnomalyNotificationResult result = anomalyNotifier.onBrokerFailure(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.BROKER_FAILURES_CLASS_CONFIG, BrokerFailures.class, parameterConfigOverrides));
assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.BROKER_FAILURE));
assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.BROKER_FAILURE));
// (2) Test goal violation anomaly can be detected by notifier.
anomalyNotifier.resetAlert(KafkaAnomalyType.GOAL_VIOLATION);
result = anomalyNotifier.onGoalViolation(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.GOAL_VIOLATIONS_CLASS_CONFIG, GoalViolations.class, parameterConfigOverrides));
assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.GOAL_VIOLATION));
assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.GOAL_VIOLATION));
// (3) Test metric anomaly can be detected by notifier.
anomalyNotifier.resetAlert(KafkaAnomalyType.METRIC_ANOMALY);
result = anomalyNotifier.onMetricAnomaly(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.METRIC_ANOMALY_CLASS_CONFIG, KafkaMetricAnomaly.class, parameterConfigOverrides));
assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.METRIC_ANOMALY));
assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.METRIC_ANOMALY));
// (4) Test disk failure anomaly can be detected by notifier.
anomalyNotifier.resetAlert(KafkaAnomalyType.DISK_FAILURE);
result = anomalyNotifier.onDiskFailure(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.DISK_FAILURES_CLASS_CONFIG, DiskFailures.class, parameterConfigOverrides));
assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.DISK_FAILURE));
assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.DISK_FAILURE));
// (5) Test topic anomaly can be detected by notifier.
anomalyNotifier.resetAlert(KafkaAnomalyType.TOPIC_ANOMALY);
TopicReplicationFactorAnomaly topicReplicationFactorAnomaly = new TopicReplicationFactorAnomaly();
topicReplicationFactorAnomaly.configure(parameterConfigOverrides);
result = anomalyNotifier.onTopicAnomaly(topicReplicationFactorAnomaly);
assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.TOPIC_ANOMALY));
assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.TOPIC_ANOMALY));
EasyMock.verify(mockKafkaCruiseControl);
}
use of com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly in project cruise-control by linkedin.
the class AlertaSelfHealingNotifier method alertTopicAnomaly.
private void alertTopicAnomaly(AnomalyType anomalyType, final String localHostname, List<AlertaMessage> alertaMessages, TopicAnomaly topicAnomaly) {
if (topicAnomaly instanceof TopicPartitionSizeAnomaly) {
TopicPartitionSizeAnomaly topicPartitionSizeAnomaly = (TopicPartitionSizeAnomaly) topicAnomaly;
for (Map.Entry<TopicPartition, Double> entry : topicPartitionSizeAnomaly.sizeInMbByPartition().entrySet()) {
AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_PARTITION_SIZE_ANOMALY + entry.getKey().toString());
alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
alertaMessage.setValue(String.format("%f MB", entry.getValue()));
alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
alertaMessages.add(alertaMessage);
}
} else if (topicAnomaly instanceof TopicReplicationFactorAnomaly) {
TopicReplicationFactorAnomaly topicReplicationFactorAnomaly = (TopicReplicationFactorAnomaly) topicAnomaly;
for (Entry<Short, Set<TopicReplicationFactorAnomalyEntry>> entry : topicReplicationFactorAnomaly.badTopicsByDesiredRF().entrySet()) {
entry.getValue().forEach(topicReplicationFactorAnomalyEntry -> {
AlertaMessage alertaMessage = new AlertaMessage(localHostname, ALERT_MESSAGE_PREFIX_TOPIC_REPLICATION_FACTOR_ANOMALY + topicReplicationFactorAnomalyEntry.topicName());
alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
alertaMessage.setValue(String.format("%.2f", topicReplicationFactorAnomalyEntry.violationRatio()));
alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
alertaMessages.add(alertaMessage);
});
}
} else {
AlertaMessage alertaMessage = new AlertaMessage(localHostname, anomalyType.toString());
alertaMessage.setSeverity(NotifierUtils.getAlertSeverity(anomalyType).toString());
alertaMessage.setGroup(AlertaAlertGroup.PERFORMANCE.toString());
alertaMessage.setCreateTime(CruiseControlUtils.utcDateFor(topicAnomaly.detectionTimeMs(), 3, ChronoUnit.SECONDS));
alertaMessages.add(alertaMessage);
}
}
Aggregations