Search in sources :

Example 6 with BrokerEntity

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity in project cruise-control by linkedin.

the class SlowBrokerFinder method detectMetricAnomaliesFromHistory.

private void detectMetricAnomaliesFromHistory(Map<BrokerEntity, List<Double>> historicalValue, Map<BrokerEntity, Double> currentValue, Set<BrokerEntity> detectedMetricAnomalies) {
    for (Map.Entry<BrokerEntity, Double> entry : currentValue.entrySet()) {
        BrokerEntity entity = entry.getKey();
        if (historicalValue.get(entity) != null && isDataSufficient(historicalValue.get(entity).size(), _metricHistoryPercentile, _metricHistoryPercentile)) {
            double[] data = historicalValue.get(entity).stream().mapToDouble(i -> i).toArray();
            _percentile.setData(data);
            if (currentValue.get(entity) > _percentile.evaluate(_metricHistoryPercentile) * _metricHistoryMargin) {
                detectedMetricAnomalies.add(entity);
            }
        }
    }
}
Also used : BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) MetricAnomalyFinder(com.linkedin.cruisecontrol.detector.metricanomaly.MetricAnomalyFinder) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) METRIC_ANOMALY_DESCRIPTION_OBJECT_CONFIG(com.linkedin.kafka.cruisecontrol.detector.MetricAnomalyDetector.METRIC_ANOMALY_DESCRIPTION_OBJECT_CONFIG) KafkaMetricDef(com.linkedin.kafka.cruisecontrol.monitor.metricdefinition.KafkaMetricDef) Map(java.util.Map) METRIC_ANOMALY_BROKER_ENTITIES_OBJECT_CONFIG(com.linkedin.kafka.cruisecontrol.detector.MetricAnomalyDetector.METRIC_ANOMALY_BROKER_ENTITIES_OBJECT_CONFIG) AggregatedMetricValues(com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues) KafkaCruiseControl(com.linkedin.kafka.cruisecontrol.KafkaCruiseControl) KAFKA_CRUISE_CONTROL_OBJECT_CONFIG(com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorUtils.KAFKA_CRUISE_CONTROL_OBJECT_CONFIG) METRIC_ANOMALY_FIXABLE_OBJECT_CONFIG(com.linkedin.kafka.cruisecontrol.detector.MetricAnomalyDetector.METRIC_ANOMALY_FIXABLE_OBJECT_CONFIG) CruiseControlUtils.utcDateFor(com.linkedin.cruisecontrol.CruiseControlUtils.utcDateFor) Logger(org.slf4j.Logger) ValuesAndExtrapolations(com.linkedin.cruisecontrol.monitor.sampling.aggregator.ValuesAndExtrapolations) MetricAnomaly(com.linkedin.cruisecontrol.detector.metricanomaly.MetricAnomaly) Predicate(java.util.function.Predicate) Collection(java.util.Collection) MetricAnomalyType(com.linkedin.cruisecontrol.detector.metricanomaly.MetricAnomalyType) AnomalyDetectorConfig(com.linkedin.kafka.cruisecontrol.config.constants.AnomalyDetectorConfig) Set(java.util.Set) ANOMALY_DETECTION_TIME_MS_OBJECT_CONFIG(com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorUtils.ANOMALY_DETECTION_TIME_MS_OBJECT_CONFIG) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) AnomalyUtils.parseAndGetConfig(com.linkedin.kafka.cruisecontrol.detector.AnomalyUtils.parseAndGetConfig) Collections(java.util.Collections) PercentileMetricAnomalyFinderUtils.isDataSufficient(com.linkedin.cruisecontrol.detector.metricanomaly.PercentileMetricAnomalyFinderUtils.isDataSufficient) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with BrokerEntity

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity in project cruise-control by linkedin.

the class AnomalyDetectorManagerTest method testFixAnomaly.

private void testFixAnomaly(AnomalyType anomalyType) throws InterruptedException, KafkaCruiseControlException, NotEnoughValidWindowsException, TimeoutException {
    PriorityBlockingQueue<Anomaly> anomalies = new PriorityBlockingQueue<>(ANOMALY_DETECTOR_INITIAL_QUEUE_SIZE, anomalyComparator());
    AnomalyNotifier mockAnomalyNotifier = EasyMock.mock(AnomalyNotifier.class);
    BrokerFailureDetector mockBrokerFailureDetector = EasyMock.createNiceMock(BrokerFailureDetector.class);
    GoalViolationDetector mockGoalViolationDetector = EasyMock.createNiceMock(GoalViolationDetector.class);
    MetricAnomalyDetector mockMetricAnomalyDetector = EasyMock.createNiceMock(MetricAnomalyDetector.class);
    TopicAnomalyDetector mockTopicAnomalyDetector = EasyMock.createNiceMock(TopicAnomalyDetector.class);
    MaintenanceEventDetector mockMaintenanceEventDetector = EasyMock.createNiceMock(MaintenanceEventDetector.class);
    DiskFailureDetector mockDiskFailureDetector = EasyMock.createNiceMock(DiskFailureDetector.class);
    ScheduledExecutorService mockDetectorScheduler = EasyMock.mock(ScheduledExecutorService.class);
    ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
    KafkaCruiseControl mockKafkaCruiseControl = EasyMock.mock(KafkaCruiseControl.class);
    ModelCompletenessRequirements mockModelCompletenessRequirements = EasyMock.mock(ModelCompletenessRequirements.class);
    EasyMock.expect(mockModelCompletenessRequirements.weaker(EasyMock.anyObject())).andReturn(mockModelCompletenessRequirements);
    EasyMock.expect(mockModelCompletenessRequirements.minRequiredNumWindows()).andReturn(0);
    EasyMock.replay(mockModelCompletenessRequirements);
    OptimizerResult mockOptimizerResult = EasyMock.mock(OptimizerResult.class);
    BrokerStats mockBrokerStats = EasyMock.mock(BrokerStats.class);
    Properties props = KafkaCruiseControlUnitTestUtils.getKafkaCruiseControlProperties();
    props.setProperty(AnomalyDetectorConfig.METRIC_ANOMALY_CLASS_CONFIG, SlowBrokers.class.getName());
    KafkaCruiseControlConfig kafkaCruiseControlConfig = new KafkaCruiseControlConfig(props);
    EasyMock.expect(mockKafkaCruiseControl.config()).andReturn(kafkaCruiseControlConfig).times(1, 17);
    mockKafkaCruiseControl.sanityCheckDryRun(EasyMock.eq(SELF_HEALING_DRYRUN), EasyMock.eq(false));
    EasyMock.expect(mockKafkaCruiseControl.modelCompletenessRequirements(EasyMock.anyObject())).andReturn(mockModelCompletenessRequirements).times(0, 2);
    EasyMock.expect(mockKafkaCruiseControl.getLoadMonitorTaskRunnerState()).andReturn(LoadMonitorTaskRunner.LoadMonitorTaskRunnerState.RUNNING).times(1, 2);
    startRunnableDetectors(mockDetectorScheduler, mockGoalViolationDetector, mockMetricAnomalyDetector, mockDiskFailureDetector, mockTopicAnomalyDetector, mockMaintenanceEventDetector, executorService);
    shutdownDetector(mockDetectorScheduler, executorService);
    // The following state are used to test the delayed check when executor is idle.
    EasyMock.expect(mockKafkaCruiseControl.executionState()).andReturn(ExecutorState.State.NO_TASK_IN_PROGRESS);
    EasyMock.expect(mockAnomalyNotifier.selfHealingEnabledRatio()).andReturn(MOCK_SELF_HEALING_ENABLED_RATIO);
    if (anomalyType == KafkaAnomalyType.GOAL_VIOLATION) {
        mockKafkaCruiseControl.sanityCheckDryRun(EasyMock.eq(true), EasyMock.eq(false));
        EasyMock.expect(mockKafkaCruiseControl.ignoreProposalCache(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.eq(SELF_HEALING_EXCLUDED_TOPICS), EasyMock.eq(AnomalyDetectorConfig.DEFAULT_SELF_HEALING_EXCLUDE_RECENT_BROKERS_CONFIG), EasyMock.eq(SELF_HEALING_IGNORE_PROPOSAL_CACHE), EasyMock.eq(true), EasyMock.eq(SELF_HEALING_DESTINATION_BROKER_IDS), EasyMock.eq(SELF_HEALING_IS_REBALANCE_DISK_MODE))).andReturn(false);
        EasyMock.expect(mockKafkaCruiseControl.getProposals(EasyMock.anyObject(), EasyMock.eq(AnomalyDetectorConfig.DEFAULT_ANOMALY_DETECTION_ALLOW_CAPACITY_ESTIMATION_CONFIG))).andReturn(mockOptimizerResult);
        mockKafkaCruiseControl.executeProposals(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.eq(false), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.anyObject(), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.eq(SELF_HEALING_EXECUTION_PROGRESS_CHECK_INTERVAL_MS), EasyMock.eq(SELF_HEALING_REPLICA_MOVEMENT_STRATEGY), EasyMock.eq(null), EasyMock.eq(false), EasyMock.anyString(), EasyMock.eq(false));
        EasyMock.expect(mockAnomalyNotifier.onGoalViolation(EasyMock.isA(GoalViolations.class))).andReturn(AnomalyNotificationResult.fix());
    } else if (anomalyType == KafkaAnomalyType.DISK_FAILURE) {
        ClusterModel singleBrokerWithBadDisk = singleBrokerWithBadDisk();
        EasyMock.expect(mockKafkaCruiseControl.clusterModel(EasyMock.anyObject(), EasyMock.eq(true), EasyMock.anyObject())).andReturn(singleBrokerWithBadDisk);
        EasyMock.expect(mockKafkaCruiseControl.dropRecentBrokers(EasyMock.eq(Collections.emptySet()), EasyMock.eq(true))).andReturn(false);
        EasyMock.expect(mockKafkaCruiseControl.dropRecentBrokers(EasyMock.eq(Collections.emptySet()), EasyMock.eq(false))).andReturn(false);
        ExecutorState executorState = ExecutorState.noTaskInProgress(Collections.emptySet(), Collections.emptySet());
        EasyMock.expect(mockKafkaCruiseControl.executorState()).andReturn(executorState).once();
        EasyMock.expect(mockKafkaCruiseControl.excludedTopics(singleBrokerWithBadDisk, SELF_HEALING_EXCLUDED_TOPICS)).andReturn(Collections.emptySet());
        EasyMock.expect(mockKafkaCruiseControl.optimizations(EasyMock.eq(singleBrokerWithBadDisk), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.eq(null), EasyMock.anyObject())).andReturn(mockOptimizerResult);
        mockKafkaCruiseControl.executeProposals(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.eq(false), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.anyObject(), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.eq(SELF_HEALING_EXECUTION_PROGRESS_CHECK_INTERVAL_MS), EasyMock.eq(SELF_HEALING_REPLICA_MOVEMENT_STRATEGY), EasyMock.eq(null), EasyMock.eq(false), EasyMock.anyString(), EasyMock.eq(false));
        EasyMock.expect(mockKafkaCruiseControl.acquireForModelGeneration(EasyMock.anyObject())).andReturn(null);
        EasyMock.expect(mockAnomalyNotifier.onDiskFailure(EasyMock.isA(DiskFailures.class))).andReturn(AnomalyNotificationResult.fix());
    } else if (anomalyType == KafkaAnomalyType.METRIC_ANOMALY) {
        ClusterModel smallCluster = smallClusterModel(TestConstants.BROKER_CAPACITY);
        EasyMock.expect(mockKafkaCruiseControl.clusterModel(EasyMock.anyObject(), EasyMock.eq(true), EasyMock.anyObject())).andReturn(smallCluster);
        EasyMock.expect(mockKafkaCruiseControl.kafkaCluster()).andReturn(Cluster.empty());
        EasyMock.expect(mockKafkaCruiseControl.acquireForModelGeneration(EasyMock.anyObject())).andReturn(null);
        mockKafkaCruiseControl.sanityCheckBrokerPresence(EasyMock.anyObject());
        EasyMock.expect(mockKafkaCruiseControl.dropRecentBrokers(EasyMock.eq(Collections.emptySet()), EasyMock.eq(true))).andReturn(false);
        EasyMock.expect(mockKafkaCruiseControl.dropRecentBrokers(EasyMock.eq(Collections.emptySet()), EasyMock.eq(false))).andReturn(false);
        ExecutorState executorState = ExecutorState.noTaskInProgress(Collections.emptySet(), Collections.emptySet());
        EasyMock.expect(mockKafkaCruiseControl.executorState()).andReturn(executorState).once();
        EasyMock.expect(mockKafkaCruiseControl.excludedTopics(smallCluster, SELF_HEALING_EXCLUDED_TOPICS)).andReturn(Collections.emptySet());
        EasyMock.expect(mockKafkaCruiseControl.optimizations(EasyMock.eq(smallCluster), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.eq(null), EasyMock.anyObject())).andReturn(mockOptimizerResult);
        mockKafkaCruiseControl.executeDemotion(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.eq(smallCluster.brokers().size()), EasyMock.eq(SELF_HEALING_EXECUTION_PROGRESS_CHECK_INTERVAL_MS), EasyMock.eq(SELF_HEALING_REPLICA_MOVEMENT_STRATEGY), EasyMock.eq(null), EasyMock.eq(false), EasyMock.anyString());
        EasyMock.expect(mockAnomalyNotifier.onMetricAnomaly(EasyMock.isA(SlowBrokers.class))).andReturn(AnomalyNotificationResult.fix());
    } else if (anomalyType == KafkaAnomalyType.TOPIC_ANOMALY) {
        ClusterModel clusterModel = unbalanced();
        EasyMock.expect(mockKafkaCruiseControl.clusterModel(EasyMock.anyObject(), EasyMock.eq(true), EasyMock.anyObject())).andReturn(clusterModel);
        EasyMock.expect(mockKafkaCruiseControl.kafkaCluster()).andReturn(generateClusterFromClusterModel(clusterModel));
        EasyMock.expect(mockKafkaCruiseControl.acquireForModelGeneration(EasyMock.anyObject())).andReturn(null);
        EasyMock.expect(mockKafkaCruiseControl.dropRecentBrokers(EasyMock.eq(Collections.emptySet()), EasyMock.eq(true))).andReturn(false);
        EasyMock.expect(mockKafkaCruiseControl.dropRecentBrokers(EasyMock.eq(Collections.emptySet()), EasyMock.eq(false))).andReturn(false);
        ExecutorState executorState = ExecutorState.noTaskInProgress(Collections.emptySet(), Collections.emptySet());
        EasyMock.expect(mockKafkaCruiseControl.executorState()).andReturn(executorState).once();
        EasyMock.expect(mockKafkaCruiseControl.excludedTopics(clusterModel, SELF_HEALING_EXCLUDED_TOPICS)).andReturn(Collections.emptySet());
        EasyMock.expect(mockKafkaCruiseControl.optimizations(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject())).andReturn(mockOptimizerResult);
        mockKafkaCruiseControl.executeProposals(EasyMock.anyObject(), EasyMock.eq(Collections.emptySet()), EasyMock.eq(false), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.anyObject(), EasyMock.eq(0), EasyMock.eq(SELF_HEALING_CONCURRENT_MOVEMENTS), EasyMock.eq(SELF_HEALING_EXECUTION_PROGRESS_CHECK_INTERVAL_MS), EasyMock.eq(SELF_HEALING_REPLICA_MOVEMENT_STRATEGY), EasyMock.eq(null), EasyMock.eq(false), EasyMock.anyString(), EasyMock.eq(true));
        EasyMock.expect(mockAnomalyNotifier.onTopicAnomaly(EasyMock.isA(TopicAnomaly.class))).andReturn(AnomalyNotificationResult.fix());
    }
    EasyMock.expect(mockKafkaCruiseControl.meetCompletenessRequirements(Collections.emptyList())).andReturn(true);
    EasyMock.expect(mockDetectorScheduler.schedule(EasyMock.isA(Runnable.class), EasyMock.eq(0L), EasyMock.eq(TimeUnit.MILLISECONDS))).andReturn(null);
    // Set generating proposals for execution.
    mockKafkaCruiseControl.setGeneratingProposalsForExecution(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.eq(false));
    replayCommonMocks(mockAnomalyNotifier, mockBrokerFailureDetector, mockGoalViolationDetector, mockMetricAnomalyDetector, mockTopicAnomalyDetector, mockMaintenanceEventDetector, mockDiskFailureDetector, mockDetectorScheduler, mockKafkaCruiseControl);
    expectAndReplayFixMocks(mockOptimizerResult, mockBrokerStats);
    AnomalyDetectorManager anomalyDetectorManager = new AnomalyDetectorManager(anomalies, MOCK_ANOMALY_DETECTION_INTERVAL_MS, mockKafkaCruiseControl, mockAnomalyNotifier, mockGoalViolationDetector, mockBrokerFailureDetector, mockMetricAnomalyDetector, mockDiskFailureDetector, mockTopicAnomalyDetector, mockMaintenanceEventDetector, mockDetectorScheduler);
    try {
        Map<String, Object> parameterConfigOverrides = new HashMap<>();
        parameterConfigOverrides.put(KAFKA_CRUISE_CONTROL_OBJECT_CONFIG, mockKafkaCruiseControl);
        parameterConfigOverrides.put(ANOMALY_DETECTION_TIME_MS_OBJECT_CONFIG, 100L);
        if (anomalyType == KafkaAnomalyType.GOAL_VIOLATION || anomalyType == KafkaAnomalyType.METRIC_ANOMALY || anomalyType == KafkaAnomalyType.DISK_FAILURE) {
            GoalViolations violations = kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.GOAL_VIOLATIONS_CLASS_CONFIG, GoalViolations.class, parameterConfigOverrides);
            assertTrue(violations.reasonSupplier().get().contains(String.format("%s: {}", GoalViolations.FIXABLE_GOAL_VIOLATIONS)));
            violations.addViolation("RackAwareGoal", true);
            assertTrue(violations.reasonSupplier().get().contains(String.format("%s: {RackAwareGoal}", GoalViolations.FIXABLE_GOAL_VIOLATIONS)));
            anomalies.add(violations);
        }
        if (anomalyType == KafkaAnomalyType.METRIC_ANOMALY || anomalyType == KafkaAnomalyType.DISK_FAILURE) {
            Map<BrokerEntity, Long> detectedSlowBrokers = Collections.singletonMap(new BrokerEntity("", 0), 100L);
            parameterConfigOverrides.put(METRIC_ANOMALY_BROKER_ENTITIES_OBJECT_CONFIG, detectedSlowBrokers);
            parameterConfigOverrides.put(REMOVE_SLOW_BROKER_CONFIG, false);
            parameterConfigOverrides.put(METRIC_ANOMALY_FIXABLE_OBJECT_CONFIG, true);
            SlowBrokers slowBrokers = kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.METRIC_ANOMALY_CLASS_CONFIG, SlowBrokers.class, parameterConfigOverrides);
            anomalies.add(slowBrokers);
        }
        if (anomalyType == KafkaAnomalyType.DISK_FAILURE) {
            Map<Integer, Map<String, Long>> failedDisksByBroker = Collections.singletonMap(0, Collections.singletonMap("tmp", 100L));
            parameterConfigOverrides.put(FAILED_DISKS_OBJECT_CONFIG, failedDisksByBroker);
            DiskFailures diskFailures = kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.DISK_FAILURES_CLASS_CONFIG, DiskFailures.class, parameterConfigOverrides);
            anomalies.add(diskFailures);
        }
        if (anomalyType == KafkaAnomalyType.GOAL_VIOLATION || anomalyType == KafkaAnomalyType.METRIC_ANOMALY || anomalyType == KafkaAnomalyType.DISK_FAILURE || anomalyType == KafkaAnomalyType.TOPIC_ANOMALY) {
            parameterConfigOverrides.put(BAD_TOPICS_BY_DESIRED_RF_CONFIG, Collections.singletonMap((short) 2, Collections.singleton(TOPIC_REPLICATION_FACTOR_ANOMALY_ENTRY)));
            parameterConfigOverrides.put(SELF_HEALING_TARGET_TOPIC_REPLICATION_FACTOR_CONFIG, (short) 2);
            TopicAnomaly topicAnomaly = new TopicReplicationFactorAnomaly();
            topicAnomaly.configure(parameterConfigOverrides);
            anomalies.add(topicAnomaly);
        }
        anomalyDetectorManager.startDetection();
        while (anomalyDetectorManager.numSelfHealingStarted() < 1) {
        // Wait for the anomaly to be fixed before attempting to shutdown the anomaly detector.
        }
        anomalyDetectorManager.shutdown();
        assertEquals(1, anomalyDetectorManager.numSelfHealingStarted());
        assertEquals(0, anomalyDetectorManager.numCheckedWithDelay());
        assertTrue(executorService.awaitTermination(MOCK_ANOMALY_DETECTOR_SHUTDOWN_MS, TimeUnit.MILLISECONDS));
        AnomalyDetectorState anomalyDetectorState = anomalyDetectorManager.anomalyDetectorState();
        assertEquals((long) anomalyDetectorState.metrics().get(NUM_SELF_HEALING_STARTED), 1L);
        assertEquals(anomalyDetectorState.recentAnomaliesByType().get(KafkaAnomalyType.BROKER_FAILURE).size(), 0);
        assertEquals(anomalyDetectorState.recentAnomaliesByType().get(KafkaAnomalyType.GOAL_VIOLATION).size(), anomalyType == KafkaAnomalyType.GOAL_VIOLATION ? 1 : 0);
        assertEquals(anomalyDetectorState.recentAnomaliesByType().get(KafkaAnomalyType.DISK_FAILURE).size(), anomalyType == KafkaAnomalyType.DISK_FAILURE ? 1 : 0);
        assertEquals(anomalyDetectorState.recentAnomaliesByType().get(KafkaAnomalyType.METRIC_ANOMALY).size(), anomalyType == KafkaAnomalyType.METRIC_ANOMALY ? 1 : 0);
        assertEquals(anomalyDetectorState.recentAnomaliesByType().get(KafkaAnomalyType.TOPIC_ANOMALY).size(), anomalyType == KafkaAnomalyType.TOPIC_ANOMALY ? 1 : 0);
        EasyMock.verify(mockAnomalyNotifier, mockDetectorScheduler, mockKafkaCruiseControl, mockBrokerFailureDetector, mockGoalViolationDetector, mockMetricAnomalyDetector, mockTopicAnomalyDetector, mockMaintenanceEventDetector, mockDiskFailureDetector);
    } finally {
        executorService.shutdown();
    }
    EasyMock.verify(mockAnomalyNotifier, mockDetectorScheduler, mockKafkaCruiseControl, mockBrokerFailureDetector, mockGoalViolationDetector, mockMetricAnomalyDetector, mockTopicAnomalyDetector, mockMaintenanceEventDetector, mockDiskFailureDetector);
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) Properties(java.util.Properties) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) Anomaly(com.linkedin.cruisecontrol.detector.Anomaly) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AnomalyNotifier(com.linkedin.kafka.cruisecontrol.detector.notifier.AnomalyNotifier) KafkaCruiseControl(com.linkedin.kafka.cruisecontrol.KafkaCruiseControl) PriorityBlockingQueue(java.util.concurrent.PriorityBlockingQueue) DeterministicCluster.smallClusterModel(com.linkedin.kafka.cruisecontrol.common.DeterministicCluster.smallClusterModel) ClusterModel(com.linkedin.kafka.cruisecontrol.model.ClusterModel) DeterministicCluster.generateClusterFromClusterModel(com.linkedin.kafka.cruisecontrol.common.DeterministicCluster.generateClusterFromClusterModel) ExecutorState(com.linkedin.kafka.cruisecontrol.executor.ExecutorState) OptimizerResult(com.linkedin.kafka.cruisecontrol.analyzer.OptimizerResult) ModelCompletenessRequirements(com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements) BrokerStats(com.linkedin.kafka.cruisecontrol.servlet.response.stats.BrokerStats) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 8 with BrokerEntity

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity in project cruise-control by linkedin.

the class SelfHealingNotifierTest method testSelfHealingDisabled.

@Test
public void testSelfHealingDisabled() {
    final long startTime = 500L;
    Time mockTime = new MockTime(startTime);
    KafkaCruiseControl mockKafkaCruiseControl = EasyMock.mock(KafkaCruiseControl.class);
    Properties props = KafkaCruiseControlUnitTestUtils.getKafkaCruiseControlProperties();
    KafkaCruiseControlConfig kafkaCruiseControlConfig = new KafkaCruiseControlConfig(props);
    EasyMock.expect(mockKafkaCruiseControl.config()).andReturn(kafkaCruiseControlConfig).atLeastOnce();
    EasyMock.replay(mockKafkaCruiseControl);
    TestingBrokerFailureAutoFixNotifier anomalyNotifier = new TestingBrokerFailureAutoFixNotifier(mockTime);
    Map<String, String> selfHealingExplicitlyDisabled = Map.of(SelfHealingNotifier.SELF_HEALING_BROKER_FAILURE_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_GOAL_VIOLATION_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_METRIC_ANOMALY_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_DISK_FAILURE_ENABLED_CONFIG, "false", SelfHealingNotifier.SELF_HEALING_TOPIC_ANOMALY_ENABLED_CONFIG, "false", // Set to verify the overriding of specific config over general config
    SelfHealingNotifier.SELF_HEALING_ENABLED_CONFIG, "true");
    anomalyNotifier.configure(selfHealingExplicitlyDisabled);
    // (1) Test broker failure anomaly can be detected by notifier.
    final long failureTime1 = 200L;
    final long failureTime2 = 400L;
    Map<Integer, Long> failedBrokers = Map.of(1, failureTime1, 2, failureTime2);
    final long anomalyDetectionTime = 200L;
    final BrokerEntity brokerWithMetricAnomaly = new BrokerEntity("local", 1);
    mockTime.sleep(SelfHealingNotifier.DEFAULT_AUTO_FIX_THRESHOLD_MS + failureTime1);
    anomalyNotifier.resetAlert(KafkaAnomalyType.BROKER_FAILURE);
    Map<String, Object> parameterConfigOverrides = Map.of(KAFKA_CRUISE_CONTROL_OBJECT_CONFIG, mockKafkaCruiseControl, FAILED_BROKERS_OBJECT_CONFIG, failedBrokers, BROKER_FAILURES_FIXABLE_CONFIG, true, ANOMALY_DETECTION_TIME_MS_OBJECT_CONFIG, anomalyDetectionTime, METRIC_ANOMALY_FIXABLE_OBJECT_CONFIG, false, METRIC_ANOMALY_BROKER_ENTITIES_OBJECT_CONFIG, Collections.singletonMap(brokerWithMetricAnomaly, anomalyDetectionTime), FAILED_DISKS_OBJECT_CONFIG, Collections.singletonMap(1, Collections.singletonMap(BAD_LOGDIR, failureTime1)), SELF_HEALING_TARGET_TOPIC_REPLICATION_FACTOR_CONFIG, SELF_HEALING_TARGET_REPLICATION_FACTOR, BAD_TOPICS_BY_DESIRED_RF_CONFIG, Collections.singletonMap(SELF_HEALING_TARGET_REPLICATION_FACTOR, Collections.singleton(TOPIC_REPLICATION_FACTOR_ANOMALY_ENTRY)));
    AnomalyNotificationResult result = anomalyNotifier.onBrokerFailure(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.BROKER_FAILURES_CLASS_CONFIG, BrokerFailures.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.BROKER_FAILURE));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.BROKER_FAILURE));
    // (2) Test goal violation anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.GOAL_VIOLATION);
    result = anomalyNotifier.onGoalViolation(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.GOAL_VIOLATIONS_CLASS_CONFIG, GoalViolations.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.GOAL_VIOLATION));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.GOAL_VIOLATION));
    // (3) Test metric anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.METRIC_ANOMALY);
    result = anomalyNotifier.onMetricAnomaly(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.METRIC_ANOMALY_CLASS_CONFIG, KafkaMetricAnomaly.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.METRIC_ANOMALY));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.METRIC_ANOMALY));
    // (4) Test disk failure anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.DISK_FAILURE);
    result = anomalyNotifier.onDiskFailure(kafkaCruiseControlConfig.getConfiguredInstance(AnomalyDetectorConfig.DISK_FAILURES_CLASS_CONFIG, DiskFailures.class, parameterConfigOverrides));
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.DISK_FAILURE));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.DISK_FAILURE));
    // (5) Test topic anomaly can be detected by notifier.
    anomalyNotifier.resetAlert(KafkaAnomalyType.TOPIC_ANOMALY);
    TopicReplicationFactorAnomaly topicReplicationFactorAnomaly = new TopicReplicationFactorAnomaly();
    topicReplicationFactorAnomaly.configure(parameterConfigOverrides);
    result = anomalyNotifier.onTopicAnomaly(topicReplicationFactorAnomaly);
    assertEquals(AnomalyNotificationResult.Action.IGNORE, result.action());
    assertTrue(anomalyNotifier.isAlertCalledFor(KafkaAnomalyType.TOPIC_ANOMALY));
    assertFalse(anomalyNotifier.isAutoFixTriggeredFor(KafkaAnomalyType.TOPIC_ANOMALY));
    EasyMock.verify(mockKafkaCruiseControl);
}
Also used : KafkaCruiseControl(com.linkedin.kafka.cruisecontrol.KafkaCruiseControl) MockTime(org.apache.kafka.common.utils.MockTime) Time(org.apache.kafka.common.utils.Time) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) Properties(java.util.Properties) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) BrokerFailures(com.linkedin.kafka.cruisecontrol.detector.BrokerFailures) TopicReplicationFactorAnomaly(com.linkedin.kafka.cruisecontrol.detector.TopicReplicationFactorAnomaly) MockTime(org.apache.kafka.common.utils.MockTime) Test(org.junit.Test)

Example 9 with BrokerEntity

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity in project cruise-control by linkedin.

the class ConcurrencyAdjusterTest method testWithinConcurrencyAdjusterLimit.

@Test
public void testWithinConcurrencyAdjusterLimit() {
    // Verify within the limit by adding brokers under the limit.
    List<Map<Short, Double>> metricValueByIdPerBroker = new ArrayList<>(NUM_BROKERS);
    for (int i = 0; i < NUM_BROKERS; i++) {
        metricValueByIdPerBroker.add(populateMetricValues(0));
    }
    Map<BrokerEntity, ValuesAndExtrapolations> currentMetrics = createCurrentMetrics(metricValueByIdPerBroker);
    assertTrue(ExecutionUtils.withinConcurrencyAdjusterLimit(currentMetrics));
    // Verify over the limit by adding a broker with just one metric over the limit.
    metricValueByIdPerBroker.add(populateMetricValues(1));
    currentMetrics = createCurrentMetrics(metricValueByIdPerBroker);
    assertFalse(ExecutionUtils.withinConcurrencyAdjusterLimit(currentMetrics));
    // Verify over the limit by having a broker with no metrics.
    metricValueByIdPerBroker.remove(NUM_BROKERS);
    currentMetrics = createCurrentMetrics(metricValueByIdPerBroker);
    currentMetrics.put(new BrokerEntity(ExecutorTest.class.getSimpleName(), 42), null);
    assertFalse(ExecutionUtils.withinConcurrencyAdjusterLimit(currentMetrics));
}
Also used : ValuesAndExtrapolations(com.linkedin.cruisecontrol.monitor.sampling.aggregator.ValuesAndExtrapolations) ArrayList(java.util.ArrayList) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 10 with BrokerEntity

use of com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity in project cruise-control by linkedin.

the class SlowBrokerFinder method createSlowBrokerAnomalies.

private Set<MetricAnomaly<BrokerEntity>> createSlowBrokerAnomalies(Set<BrokerEntity> detectedMetricAnomalies, int clusterSize) {
    Set<MetricAnomaly<BrokerEntity>> detectedSlowBrokers = new HashSet<>();
    Map<BrokerEntity, Long> brokersToDemote = new HashMap<>();
    Map<BrokerEntity, Long> brokersToRemove = new HashMap<>();
    for (BrokerEntity broker : detectedMetricAnomalies) {
        // Report anomaly if slowness score reaches threshold for broker decommission/demotion.
        int slownessScore = _brokerSlownessScore.get(broker);
        if (slownessScore == _slowBrokerDecommissionScore) {
            brokersToRemove.put(broker, _detectedSlowBrokers.get(broker));
        } else if (slownessScore >= _slowBrokerDemotionScore) {
            brokersToDemote.put(broker, _detectedSlowBrokers.get(broker));
        }
    }
    // Update number of slow brokers with the given type.
    int numBrokersToDemoteOrRemove = brokersToDemote.size() + brokersToRemove.size();
    _numSlowBrokersByType.put(MetricAnomalyType.PERSISTENT, brokersToRemove.size());
    _numSlowBrokersByType.put(MetricAnomalyType.RECENT, brokersToDemote.size());
    _numSlowBrokersByType.put(MetricAnomalyType.SUSPECT, _detectedSlowBrokers.size() - numBrokersToDemoteOrRemove);
    // Otherwise report anomaly with brokers to be removed/demoted.
    if (numBrokersToDemoteOrRemove > clusterSize * _selfHealingUnfixableRatio) {
        brokersToRemove.forEach(brokersToDemote::put);
        detectedSlowBrokers.add(createSlowBrokersAnomaly(brokersToDemote, false, false));
    } else {
        if (!brokersToDemote.isEmpty()) {
            detectedSlowBrokers.add(createSlowBrokersAnomaly(brokersToDemote, true, false));
        }
        if (!brokersToRemove.isEmpty()) {
            detectedSlowBrokers.add(createSlowBrokersAnomaly(brokersToRemove, _slowBrokerRemovalEnabled, true));
        }
    }
    return detectedSlowBrokers;
}
Also used : MetricAnomaly(com.linkedin.cruisecontrol.detector.metricanomaly.MetricAnomaly) HashMap(java.util.HashMap) BrokerEntity(com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity) HashSet(java.util.HashSet)

Aggregations

BrokerEntity (com.linkedin.kafka.cruisecontrol.monitor.sampling.holder.BrokerEntity)11 HashMap (java.util.HashMap)8 Map (java.util.Map)7 HashSet (java.util.HashSet)6 ValuesAndExtrapolations (com.linkedin.cruisecontrol.monitor.sampling.aggregator.ValuesAndExtrapolations)5 KafkaCruiseControl (com.linkedin.kafka.cruisecontrol.KafkaCruiseControl)4 ArrayList (java.util.ArrayList)4 KafkaCruiseControlConfig (com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig)3 MetricAnomaly (com.linkedin.cruisecontrol.detector.metricanomaly.MetricAnomaly)2 AggregatedMetricValues (com.linkedin.cruisecontrol.monitor.sampling.aggregator.AggregatedMetricValues)2 List (java.util.List)2 Properties (java.util.Properties)2 Test (org.junit.Test)2 CruiseControlUtils.utcDateFor (com.linkedin.cruisecontrol.CruiseControlUtils.utcDateFor)1 Anomaly (com.linkedin.cruisecontrol.detector.Anomaly)1 MetricAnomalyFinder (com.linkedin.cruisecontrol.detector.metricanomaly.MetricAnomalyFinder)1 MetricAnomalyType (com.linkedin.cruisecontrol.detector.metricanomaly.MetricAnomalyType)1 PercentileMetricAnomalyFinderUtils.isDataSufficient (com.linkedin.cruisecontrol.detector.metricanomaly.PercentileMetricAnomalyFinderUtils.isDataSufficient)1 MetricValues (com.linkedin.cruisecontrol.monitor.sampling.aggregator.MetricValues)1 OptimizerResult (com.linkedin.kafka.cruisecontrol.analyzer.OptimizerResult)1