Search in sources :

Example 1 with AnomalyDetectorManager

use of com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager in project cruise-control by linkedin.

the class ExecutorTest method executeAndVerifyProposals.

private void executeAndVerifyProposals(KafkaZkClient kafkaZkClient, Collection<ExecutionProposal> proposalsToExecute, Collection<ExecutionProposal> proposalsToCheck, boolean completeWithError, Long replicationThrottle, boolean verifyProgress, boolean isTriggeredByUserRequest) throws OngoingExecutionException {
    KafkaCruiseControlConfig configs = new KafkaCruiseControlConfig(getExecutorProperties());
    UserTaskManager.UserTaskInfo mockUserTaskInfo = getMockUserTaskInfo();
    UserTaskManager mockUserTaskManager = isTriggeredByUserRequest ? getMockUserTaskManager(RANDOM_UUID, mockUserTaskInfo, Collections.singletonList(completeWithError)) : null;
    ExecutorNotifier mockExecutorNotifier = EasyMock.mock(ExecutorNotifier.class);
    LoadMonitor mockLoadMonitor = getMockLoadMonitor();
    Capture<String> captureMessage = Capture.newInstance(CaptureType.FIRST);
    AnomalyDetectorManager mockAnomalyDetectorManager = getMockAnomalyDetector(RANDOM_UUID);
    if (completeWithError) {
        mockExecutorNotifier.sendAlert(EasyMock.capture(captureMessage));
    } else {
        mockExecutorNotifier.sendNotification(EasyMock.capture(captureMessage));
    }
    if (isTriggeredByUserRequest) {
        EasyMock.replay(mockUserTaskInfo, mockUserTaskManager, mockExecutorNotifier, mockLoadMonitor, mockAnomalyDetectorManager);
    } else {
        EasyMock.replay(mockUserTaskInfo, mockExecutorNotifier, mockLoadMonitor, mockAnomalyDetectorManager);
    }
    Executor executor = new Executor(configs, new SystemTime(), new MetricRegistry(), null, mockExecutorNotifier, mockAnomalyDetectorManager);
    executor.setUserTaskManager(mockUserTaskManager);
    Map<TopicPartition, Integer> replicationFactors = new HashMap<>();
    for (ExecutionProposal proposal : proposalsToCheck) {
        TopicPartition tp = new TopicPartition(proposal.topic(), proposal.partitionId());
        replicationFactors.put(tp, proposal.oldReplicas().size());
    }
    executor.setGeneratingProposalsForExecution(RANDOM_UUID, ExecutorTest.class::getSimpleName, isTriggeredByUserRequest);
    executor.executeProposals(proposalsToExecute, Collections.emptySet(), null, mockLoadMonitor, null, null, null, null, null, null, replicationThrottle, isTriggeredByUserRequest, RANDOM_UUID, false, false);
    if (verifyProgress) {
        verifyOngoingPartitionReassignments(Collections.singleton(TP0));
    }
    waitUntilTrue(() -> (!executor.hasOngoingExecution() && executor.state().state() == ExecutorState.State.NO_TASK_IN_PROGRESS), "Proposal execution did not finish within the time limit", EXECUTION_DEADLINE_MS, EXECUTION_REGULAR_CHECK_MS);
    // Check notification is sent after execution has finished.
    String notification = captureMessage.getValue();
    assertTrue(notification.contains(RANDOM_UUID));
    for (ExecutionProposal proposal : proposalsToCheck) {
        TopicPartition tp = new TopicPartition(proposal.topic(), proposal.partitionId());
        int expectedReplicationFactor = replicationFactors.get(tp);
        assertEquals("Replication factor for partition " + tp + " should be " + expectedReplicationFactor, expectedReplicationFactor, kafkaZkClient.getReplicasForPartition(tp).size());
        if (proposal.hasReplicaAction()) {
            for (ReplicaPlacementInfo r : proposal.newReplicas()) {
                assertTrue("The partition should have moved for " + tp, kafkaZkClient.getReplicasForPartition(tp).contains(r.brokerId()));
            }
        }
        assertEquals("The leader should have moved for " + tp, proposal.newLeader().brokerId(), kafkaZkClient.getLeaderForPartition(tp).get());
    }
    if (isTriggeredByUserRequest) {
        EasyMock.verify(mockUserTaskInfo, mockUserTaskManager, mockExecutorNotifier, mockLoadMonitor, mockAnomalyDetectorManager);
    } else {
        EasyMock.verify(mockUserTaskInfo, mockExecutorNotifier, mockLoadMonitor, mockAnomalyDetectorManager);
    }
}
Also used : HashMap(java.util.HashMap) MetricRegistry(com.codahale.metrics.MetricRegistry) AnomalyDetectorManager(com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) UserTaskManager(com.linkedin.kafka.cruisecontrol.servlet.UserTaskManager) LoadMonitor(com.linkedin.kafka.cruisecontrol.monitor.LoadMonitor) ReplicaPlacementInfo(com.linkedin.kafka.cruisecontrol.model.ReplicaPlacementInfo) SystemTime(org.apache.kafka.common.utils.SystemTime)

Example 2 with AnomalyDetectorManager

use of com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager in project cruise-control by linkedin.

the class ExecutorTest method getMockAnomalyDetector.

private static AnomalyDetectorManager getMockAnomalyDetector(String anomalyId) {
    AnomalyDetectorManager mockAnomalyDetectorManager = EasyMock.mock(AnomalyDetectorManager.class);
    mockAnomalyDetectorManager.maybeClearOngoingAnomalyDetectionTimeMs();
    expectLastCall().anyTimes();
    mockAnomalyDetectorManager.resetHasUnfixableGoals();
    expectLastCall().anyTimes();
    mockAnomalyDetectorManager.markSelfHealingFinished(anomalyId);
    expectLastCall().anyTimes();
    return mockAnomalyDetectorManager;
}
Also used : AnomalyDetectorManager(com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager)

Example 3 with AnomalyDetectorManager

use of com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager in project cruise-control by linkedin.

the class KafkaCruiseControlTest method testSanityCheckDryRun.

@Test
public void testSanityCheckDryRun() throws InterruptedException, ExecutionException, TimeoutException {
    Time time = EasyMock.mock(Time.class);
    AnomalyDetectorManager anomalyDetectorManager = EasyMock.mock(AnomalyDetectorManager.class);
    Executor executor = EasyMock.strictMock(Executor.class);
    LoadMonitor loadMonitor = EasyMock.mock(LoadMonitor.class);
    ExecutorService goalOptimizerExecutor = EasyMock.mock(ExecutorService.class);
    GoalOptimizer goalOptimizer = EasyMock.mock(GoalOptimizer.class);
    // For sanityCheckDryRun(false, true) and sanityCheckDryRun(false, false) (see #1 and #2 below).
    EasyMock.expect(executor.hasOngoingExecution()).andReturn(true).times(2);
    // For sanityCheckDryRun(false, XXX) (see #3 below)
    EasyMock.expect(executor.hasOngoingExecution()).andReturn(false).once();
    EasyMock.expect(executor.listPartitionsBeingReassigned()).andReturn(DUMMY_ONGOING_PARTITION_REASSIGNMENTS);
    EasyMock.expect(executor.maybeStopExternalAgent()).andReturn(true);
    // For sanityCheckDryRun(false, XXX) (see #4 below)
    EasyMock.expect(executor.hasOngoingExecution()).andReturn(false).once();
    EasyMock.expect(executor.listPartitionsBeingReassigned()).andReturn(Collections.emptySet());
    // For sanityCheckDryRun(false, XXX) (see #5 below)
    EasyMock.expect(executor.hasOngoingExecution()).andReturn(false).once();
    EasyMock.expect(executor.listPartitionsBeingReassigned()).andThrow(new TimeoutException()).once();
    EasyMock.replay(time, anomalyDetectorManager, executor, loadMonitor, goalOptimizerExecutor, goalOptimizer);
    KafkaCruiseControl kafkaCruiseControl = new KafkaCruiseControl(_config, time, anomalyDetectorManager, executor, loadMonitor, goalOptimizerExecutor, goalOptimizer, new NoopProvisioner());
    // Expect no failure (dryrun = true) regardless of ongoing executions.
    kafkaCruiseControl.sanityCheckDryRun(true, false);
    kafkaCruiseControl.sanityCheckDryRun(true, true);
    // 1. Expect no failure (dryrun = false), if there is ongoing execution started by CC, it must be requested to stop.
    kafkaCruiseControl.sanityCheckDryRun(false, true);
    // 2. Expect failure (dryrun = false), if there is ongoing execution started by CC, not requested to stop.
    assertThrows(IllegalStateException.class, () -> kafkaCruiseControl.sanityCheckDryRun(false, false));
    // 3. Expect no failure (dryrun = false), there is no execution started by CC, but ongoing replica reassignment, request to stop is irrelevant.
    kafkaCruiseControl.sanityCheckDryRun(false, false);
    // 4. Expect no failure (dryrun = false), there is no execution started by CC or other tools, request to stop is irrelevant.
    kafkaCruiseControl.sanityCheckDryRun(false, false);
    // 5. Expect failure (dryrun = false), there is no execution started by CC, but checking ongoing executions started
    // by other tools timed out, request to stop is irrelevant.
    assertThrows(IllegalStateException.class, () -> kafkaCruiseControl.sanityCheckDryRun(false, false));
    EasyMock.verify(time, anomalyDetectorManager, executor, loadMonitor, goalOptimizerExecutor, goalOptimizer);
    // Verify initialization and functioning of Admin Client
    AdminClient adminClient = kafkaCruiseControl.adminClient();
    assertNotNull(adminClient);
    assertEquals(clusterSize(), adminClient.describeCluster().nodes().get(CLIENT_REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS).size());
}
Also used : Executor(com.linkedin.kafka.cruisecontrol.executor.Executor) AnomalyDetectorManager(com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager) GoalOptimizer(com.linkedin.kafka.cruisecontrol.analyzer.GoalOptimizer) NoopProvisioner(com.linkedin.kafka.cruisecontrol.detector.NoopProvisioner) ExecutorService(java.util.concurrent.ExecutorService) Time(org.apache.kafka.common.utils.Time) LoadMonitor(com.linkedin.kafka.cruisecontrol.monitor.LoadMonitor) TimeoutException(java.util.concurrent.TimeoutException) AdminClient(org.apache.kafka.clients.admin.AdminClient) Test(org.junit.Test)

Example 4 with AnomalyDetectorManager

use of com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager in project cruise-control by linkedin.

the class ExecutorTest method testTimeoutAndExecutionStop.

@Test
public void testTimeoutAndExecutionStop() throws InterruptedException, OngoingExecutionException {
    createTopics(0);
    // The proposal tries to move the leader. We fake the replica list to be unchanged so there is no replica
    // movement, but only leader movement.
    ExecutionProposal proposal = new ExecutionProposal(TP1, 0, new ReplicaPlacementInfo(1), Arrays.asList(new ReplicaPlacementInfo(0), new ReplicaPlacementInfo(1)), Arrays.asList(new ReplicaPlacementInfo(0), new ReplicaPlacementInfo(1)));
    KafkaCruiseControlConfig configs = new KafkaCruiseControlConfig(getExecutorProperties());
    Time time = new MockTime();
    MetadataClient mockMetadataClient = EasyMock.mock(MetadataClient.class);
    // Fake the metadata to never change so the leader movement will timeout.
    Node node0 = new Node(0, "host0", 100);
    Node node1 = new Node(1, "host1", 100);
    Node[] replicas = new Node[2];
    replicas[0] = node0;
    replicas[1] = node1;
    PartitionInfo partitionInfo = new PartitionInfo(TP1.topic(), TP1.partition(), node1, replicas, replicas);
    Cluster cluster = new Cluster("id", Arrays.asList(node0, node1), Collections.singleton(partitionInfo), Collections.emptySet(), Collections.emptySet());
    MetadataClient.ClusterAndGeneration clusterAndGeneration = new MetadataClient.ClusterAndGeneration(cluster, 0);
    EasyMock.expect(mockMetadataClient.refreshMetadata()).andReturn(clusterAndGeneration).anyTimes();
    EasyMock.expect(mockMetadataClient.cluster()).andReturn(clusterAndGeneration.cluster()).anyTimes();
    LoadMonitor mockLoadMonitor = getMockLoadMonitor();
    AnomalyDetectorManager mockAnomalyDetectorManager = getMockAnomalyDetector(RANDOM_UUID);
    UserTaskManager.UserTaskInfo mockUserTaskInfo = getMockUserTaskInfo();
    // This tests runs two consecutive executions. First one completes w/o error, but the second one with error.
    UserTaskManager mockUserTaskManager = getMockUserTaskManager(RANDOM_UUID, mockUserTaskInfo, Arrays.asList(false, true));
    EasyMock.replay(mockMetadataClient, mockLoadMonitor, mockAnomalyDetectorManager, mockUserTaskInfo, mockUserTaskManager);
    Collection<ExecutionProposal> proposalsToExecute = Collections.singletonList(proposal);
    Executor executor = new Executor(configs, time, new MetricRegistry(), mockMetadataClient, null, mockAnomalyDetectorManager);
    executor.setUserTaskManager(mockUserTaskManager);
    executor.setGeneratingProposalsForExecution(RANDOM_UUID, ExecutorTest.class::getSimpleName, true);
    executor.executeProposals(proposalsToExecute, Collections.emptySet(), null, mockLoadMonitor, null, null, null, null, null, null, null, true, RANDOM_UUID, false, false);
    waitUntilTrue(() -> (executor.state().state() == ExecutorState.State.LEADER_MOVEMENT_TASK_IN_PROGRESS && !executor.inExecutionTasks().isEmpty()), "Leader movement task did not start within the time limit", EXECUTION_DEADLINE_MS, EXECUTION_SHORT_CHECK_MS);
    // Sleep over ExecutorConfig#DEFAULT_LEADER_MOVEMENT_TIMEOUT_MS with some margin for inter-thread synchronization.
    time.sleep(ExecutorConfig.DEFAULT_LEADER_MOVEMENT_TIMEOUT_MS + 1L);
    // The execution should finish.
    waitUntilTrue(() -> (!executor.hasOngoingExecution() && executor.state().state() == ExecutorState.State.NO_TASK_IN_PROGRESS), "Proposal execution did not finish within the time limit", EXECUTION_DEADLINE_MS, EXECUTION_REGULAR_CHECK_MS);
    // The proposal tries to move replicas.
    proposal = new ExecutionProposal(TP1, 0, new ReplicaPlacementInfo(1), Arrays.asList(new ReplicaPlacementInfo(0), new ReplicaPlacementInfo(1)), Arrays.asList(new ReplicaPlacementInfo(1), new ReplicaPlacementInfo(0)));
    Collection<ExecutionProposal> newProposalsToExecute = Collections.singletonList(proposal);
    // Expect exception in case of UUID mismatch between UNKNOWN_UUID and RANDOM_UUID.
    executor.setGeneratingProposalsForExecution(UNKNOWN_UUID, ExecutorTest.class::getSimpleName, true);
    assertThrows(IllegalStateException.class, () -> executor.executeProposals(newProposalsToExecute, Collections.emptySet(), null, mockLoadMonitor, null, null, null, null, null, null, null, true, RANDOM_UUID, false, false));
    executor.failGeneratingProposalsForExecution(UNKNOWN_UUID);
    // Now successfully start the execution..
    executor.setGeneratingProposalsForExecution(RANDOM_UUID, ExecutorTest.class::getSimpleName, true);
    executor.executeProposals(newProposalsToExecute, Collections.emptySet(), null, mockLoadMonitor, null, null, null, null, null, null, null, true, RANDOM_UUID, false, false);
    waitUntilTrue(() -> (executor.state().state() == ExecutorState.State.INTER_BROKER_REPLICA_MOVEMENT_TASK_IN_PROGRESS), "Inter-broker replica movement task did not start within the time limit", EXECUTION_DEADLINE_MS, EXECUTION_SHORT_CHECK_MS);
    // Stop execution.
    executor.userTriggeredStopExecution(false);
    // The execution should finish.
    waitUntilTrue(() -> (!executor.hasOngoingExecution() && executor.state().state() == ExecutorState.State.NO_TASK_IN_PROGRESS), "Proposal execution did not finish within the time limit", EXECUTION_DEADLINE_MS, EXECUTION_REGULAR_CHECK_MS);
    EasyMock.verify(mockMetadataClient, mockLoadMonitor, mockAnomalyDetectorManager, mockUserTaskInfo, mockUserTaskManager);
}
Also used : Node(org.apache.kafka.common.Node) MetricRegistry(com.codahale.metrics.MetricRegistry) Cluster(org.apache.kafka.common.Cluster) MockTime(org.apache.kafka.common.utils.MockTime) Time(org.apache.kafka.common.utils.Time) SystemTime(org.apache.kafka.common.utils.SystemTime) MetadataClient(com.linkedin.kafka.cruisecontrol.common.MetadataClient) AnomalyDetectorManager(com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) UserTaskManager(com.linkedin.kafka.cruisecontrol.servlet.UserTaskManager) PartitionInfo(org.apache.kafka.common.PartitionInfo) ReplicaPlacementInfo(com.linkedin.kafka.cruisecontrol.model.ReplicaPlacementInfo) MockTime(org.apache.kafka.common.utils.MockTime) LoadMonitor(com.linkedin.kafka.cruisecontrol.monitor.LoadMonitor) Test(org.junit.Test)

Example 5 with AnomalyDetectorManager

use of com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager in project cruise-control by linkedin.

the class KafkaCruiseControlTest method testDisableAutoStopExternalAgent.

@Test
public void testDisableAutoStopExternalAgent() throws InterruptedException, ExecutionException, TimeoutException {
    Time time = EasyMock.mock(Time.class);
    AnomalyDetectorManager anomalyDetectorManager = EasyMock.mock(AnomalyDetectorManager.class);
    Executor executor = EasyMock.strictMock(Executor.class);
    LoadMonitor loadMonitor = EasyMock.mock(LoadMonitor.class);
    ExecutorService goalOptimizerExecutor = EasyMock.mock(ExecutorService.class);
    GoalOptimizer goalOptimizer = EasyMock.mock(GoalOptimizer.class);
    Properties properties = KafkaCruiseControlUnitTestUtils.getKafkaCruiseControlProperties();
    properties.put(MonitorConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers());
    properties.put(ExecutorConfig.ZOOKEEPER_CONNECT_CONFIG, zkConnect());
    properties.put(ExecutorConfig.AUTO_STOP_EXTERNAL_AGENT_CONFIG, false);
    properties.put(KafkaSampleStore.PARTITION_METRIC_SAMPLE_STORE_TOPIC_CONFIG, "__partition_samples");
    properties.put(KafkaSampleStore.BROKER_METRIC_SAMPLE_STORE_TOPIC_CONFIG, "__broker_samples");
    _config = new KafkaCruiseControlConfig(properties);
    EasyMock.expect(executor.hasOngoingExecution()).andReturn(false).once();
    EasyMock.expect(executor.listPartitionsBeingReassigned()).andReturn(DUMMY_ONGOING_PARTITION_REASSIGNMENTS);
    EasyMock.replay(time, anomalyDetectorManager, executor, loadMonitor, goalOptimizerExecutor, goalOptimizer);
    KafkaCruiseControl kafkaCruiseControl = new KafkaCruiseControl(_config, time, anomalyDetectorManager, executor, loadMonitor, goalOptimizerExecutor, goalOptimizer, new NoopProvisioner());
    // Expect failure (dryrun = false), if there is no execution started by CC, but ongoing replica reassignment, request to stop is irrelevant.
    assertThrows(IllegalStateException.class, () -> kafkaCruiseControl.sanityCheckDryRun(false, false));
    EasyMock.verify(time, anomalyDetectorManager, executor, loadMonitor, goalOptimizerExecutor, goalOptimizer);
    // Verify initialization and functioning of Admin Client
    AdminClient adminClient = kafkaCruiseControl.adminClient();
    assertNotNull(adminClient);
    assertEquals(clusterSize(), adminClient.describeCluster().nodes().get(CLIENT_REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS).size());
}
Also used : Executor(com.linkedin.kafka.cruisecontrol.executor.Executor) AnomalyDetectorManager(com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager) GoalOptimizer(com.linkedin.kafka.cruisecontrol.analyzer.GoalOptimizer) NoopProvisioner(com.linkedin.kafka.cruisecontrol.detector.NoopProvisioner) ExecutorService(java.util.concurrent.ExecutorService) KafkaCruiseControlConfig(com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig) Time(org.apache.kafka.common.utils.Time) Properties(java.util.Properties) LoadMonitor(com.linkedin.kafka.cruisecontrol.monitor.LoadMonitor) AdminClient(org.apache.kafka.clients.admin.AdminClient) Test(org.junit.Test)

Aggregations

AnomalyDetectorManager (com.linkedin.kafka.cruisecontrol.detector.AnomalyDetectorManager)5 LoadMonitor (com.linkedin.kafka.cruisecontrol.monitor.LoadMonitor)4 KafkaCruiseControlConfig (com.linkedin.kafka.cruisecontrol.config.KafkaCruiseControlConfig)3 Time (org.apache.kafka.common.utils.Time)3 Test (org.junit.Test)3 MetricRegistry (com.codahale.metrics.MetricRegistry)2 GoalOptimizer (com.linkedin.kafka.cruisecontrol.analyzer.GoalOptimizer)2 NoopProvisioner (com.linkedin.kafka.cruisecontrol.detector.NoopProvisioner)2 Executor (com.linkedin.kafka.cruisecontrol.executor.Executor)2 ReplicaPlacementInfo (com.linkedin.kafka.cruisecontrol.model.ReplicaPlacementInfo)2 UserTaskManager (com.linkedin.kafka.cruisecontrol.servlet.UserTaskManager)2 ExecutorService (java.util.concurrent.ExecutorService)2 AdminClient (org.apache.kafka.clients.admin.AdminClient)2 SystemTime (org.apache.kafka.common.utils.SystemTime)2 MetadataClient (com.linkedin.kafka.cruisecontrol.common.MetadataClient)1 HashMap (java.util.HashMap)1 Properties (java.util.Properties)1 TimeoutException (java.util.concurrent.TimeoutException)1 Cluster (org.apache.kafka.common.Cluster)1 Node (org.apache.kafka.common.Node)1