Search in sources :

Example 6 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testNoInitialStateWithAutoscaler.

@Test
public void testNoInitialStateWithAutoscaler() throws Exception {
    KafkaIndexTaskClientFactory taskClientFactory = new KafkaIndexTaskClientFactory(null, null) {

        @Override
        public KafkaIndexTaskClient build(TaskInfoProvider taskInfoProvider, String dataSource, int numThreads, Duration httpTimeout, long numRetries) {
            Assert.assertEquals(TEST_CHAT_THREADS, numThreads);
            Assert.assertEquals(TEST_HTTP_TIMEOUT.toStandardDuration(), httpTimeout);
            Assert.assertEquals(TEST_CHAT_RETRIES, numRetries);
            return taskClient;
        }
    };
    HashMap<String, Object> autoScalerConfig = new HashMap<>();
    autoScalerConfig.put("enableTaskAutoScaler", true);
    autoScalerConfig.put("lagCollectionIntervalMillis", 500);
    autoScalerConfig.put("lagCollectionRangeMillis", 500);
    autoScalerConfig.put("scaleOutThreshold", 0);
    autoScalerConfig.put("triggerScaleOutFractionThreshold", 0.0);
    autoScalerConfig.put("scaleInThreshold", 1000000);
    autoScalerConfig.put("triggerScaleInFractionThreshold", 0.8);
    autoScalerConfig.put("scaleActionStartDelayMillis", 0);
    autoScalerConfig.put("scaleActionPeriodMillis", 100);
    autoScalerConfig.put("taskCountMax", 2);
    autoScalerConfig.put("taskCountMin", 1);
    autoScalerConfig.put("scaleInStep", 1);
    autoScalerConfig.put("scaleOutStep", 2);
    autoScalerConfig.put("minTriggerScaleActionFrequencyMillis", 1200000);
    final Map<String, Object> consumerProperties = KafkaConsumerConfigs.getConsumerProperties();
    consumerProperties.put("myCustomKey", "myCustomValue");
    consumerProperties.put("bootstrap.servers", kafkaHost);
    KafkaSupervisorIOConfig kafkaSupervisorIOConfig = new KafkaSupervisorIOConfig(topic, INPUT_FORMAT, 1, 1, new Period("PT1H"), consumerProperties, OBJECT_MAPPER.convertValue(autoScalerConfig, LagBasedAutoScalerConfig.class), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, new Period("P1D"), new Period("PT30S"), true, new Period("PT30M"), null, null, null);
    final KafkaSupervisorTuningConfig tuningConfigOri = new KafkaSupervisorTuningConfig(null, 1000, null, null, 50000, null, new Period("P1Y"), new File("/test"), null, null, null, false, null, false, null, numThreads, TEST_CHAT_THREADS, TEST_CHAT_RETRIES, TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, null, null, null, null);
    EasyMock.expect(ingestionSchema.getIOConfig()).andReturn(kafkaSupervisorIOConfig).anyTimes();
    EasyMock.expect(ingestionSchema.getDataSchema()).andReturn(dataSchema).anyTimes();
    EasyMock.expect(ingestionSchema.getTuningConfig()).andReturn(tuningConfigOri).anyTimes();
    EasyMock.replay(ingestionSchema);
    SeekableStreamSupervisorSpec testableSupervisorSpec = new KafkaSupervisorSpec(ingestionSchema, dataSchema, tuningConfigOri, kafkaSupervisorIOConfig, null, false, taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new NoopServiceEmitter(), new DruidMonitorSchedulerConfig(), rowIngestionMetersFactory, new SupervisorStateManagerConfig());
    supervisor = new TestableKafkaSupervisor(taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, (KafkaSupervisorSpec) testableSupervisorSpec, rowIngestionMetersFactory);
    SupervisorTaskAutoScaler autoscaler = testableSupervisorSpec.createAutoscaler(supervisor);
    final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance();
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    int taskCountBeforeScale = supervisor.getIoConfig().getTaskCount();
    Assert.assertEquals(1, taskCountBeforeScale);
    autoscaler.start();
    supervisor.runInternal();
    Thread.sleep(1 * 1000);
    verifyAll();
    int taskCountAfterScale = supervisor.getIoConfig().getTaskCount();
    Assert.assertEquals(2, taskCountAfterScale);
    KafkaIndexTask task = captured.getValue();
    Assert.assertEquals(KafkaSupervisorTest.dataSchema, task.getDataSchema());
    Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), task.getTuningConfig());
    KafkaIndexTaskIOConfig taskConfig = task.getIOConfig();
    Assert.assertEquals(kafkaHost, taskConfig.getConsumerProperties().get("bootstrap.servers"));
    Assert.assertEquals("myCustomValue", taskConfig.getConsumerProperties().get("myCustomKey"));
    Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
    Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
    Assert.assertFalse("minimumMessageTime", taskConfig.getMinimumMessageTime().isPresent());
    Assert.assertFalse("maximumMessageTime", taskConfig.getMaximumMessageTime().isPresent());
    Assert.assertEquals(topic, taskConfig.getStartSequenceNumbers().getStream());
    Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0));
    Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1));
    Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2));
    Assert.assertEquals(topic, taskConfig.getEndSequenceNumbers().getStream());
    Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(0));
    Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(1));
    Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(2));
    autoscaler.reset();
    autoscaler.stop();
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) HashMap(java.util.HashMap) SeekableStreamSupervisorSpec(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorSpec) SupervisorTaskAutoScaler(org.apache.druid.indexing.overlord.supervisor.autoscaler.SupervisorTaskAutoScaler) KafkaIndexTaskIOConfig(org.apache.druid.indexing.kafka.KafkaIndexTaskIOConfig) Executor(java.util.concurrent.Executor) SupervisorStateManagerConfig(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig) Period(org.joda.time.Period) Duration(org.joda.time.Duration) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) DruidMonitorSchedulerConfig(org.apache.druid.server.metrics.DruidMonitorSchedulerConfig) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTaskClientFactory(org.apache.druid.indexing.kafka.KafkaIndexTaskClientFactory) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) TaskInfoProvider(org.apache.druid.indexing.common.TaskInfoProvider) LagBasedAutoScalerConfig(org.apache.druid.indexing.seekablestream.supervisor.autoscaler.LagBasedAutoScalerConfig) File(java.io.File) Test(org.junit.Test)

Example 7 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testReplicas.

@Test
public void testReplicas() throws Exception {
    supervisor = getTestableSupervisor(2, 1, true, "PT1H", null, null);
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task1 = captured.getValues().get(0);
    Assert.assertEquals(3, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(3, task1.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
    Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
    Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
    KafkaIndexTask task2 = captured.getValues().get(1);
    Assert.assertEquals(3, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(3, task2.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(0L, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
    Assert.assertEquals(0L, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
    Assert.assertEquals(0L, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
}
Also used : KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Test(org.junit.Test)

Example 8 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testGetOffsetFromStorageForPartitionWithResetOffsetAutomatically.

@Test
public void testGetOffsetFromStorageForPartitionWithResetOffsetAutomatically() throws Exception {
    addSomeEvents(2);
    supervisor = getTestableSupervisor(1, 1, true, true, "PT1H", null, null);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    EasyMock.reset(indexerMetadataStorageCoordinator);
    // unknown DataSourceMetadata in metadata store
    // for simplicity in testing the offset availability check, we use negative stored offsets in metadata here,
    // because the stream's earliest offset is 0, although that would not happen in real usage.
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(1, -100L, 2, 200L)))).times(4);
    // getOffsetFromStorageForPartition() throws an exception when the offsets are automatically reset.
    // Since getOffsetFromStorageForPartition() is called per partition, all partitions can't be reset at the same time.
    // Instead, subsequent partitions will be reset in the following supervisor runs.
    EasyMock.expect(indexerMetadataStorageCoordinator.resetDataSourceMetadata(DATASOURCE, new KafkaDataSourceMetadata(// Only one partition is reset in a single supervisor run.
    new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(2, 200L))))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) Executor(java.util.concurrent.Executor) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test)

Example 9 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testResetNoDataSourceMetadata.

@Test
public void testResetNoDataSourceMetadata() {
    supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaDataSourceMetadata resetMetadata = new KafkaDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(1, 1000L, 2, 1000L), ImmutableSet.of()));
    EasyMock.reset(indexerMetadataStorageCoordinator);
    // no DataSourceMetadata in metadata store
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(null);
    EasyMock.replay(indexerMetadataStorageCoordinator);
    supervisor.resetInternal(resetMetadata);
    verifyAll();
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) Executor(java.util.concurrent.Executor) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Test(org.junit.Test)

Example 10 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testQueueNextTasksOnSuccess.

@Test
public void testQueueNextTasksOnSuccess() throws Exception {
    supervisor = getTestableSupervisor(2, 2, true, "PT1H", null, null);
    addSomeEvents(1);
    Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(4);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    List<Task> tasks = captured.getValues();
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    TreeMap<Integer, Map<Integer, Long>> checkpoints1 = new TreeMap<>();
    checkpoints1.put(0, ImmutableMap.of(0, 0L, 2, 0L));
    TreeMap<Integer, Map<Integer, Long>> checkpoints2 = new TreeMap<>();
    checkpoints2.put(0, ImmutableMap.of(1, 0L));
    // there would be 4 tasks, 2 for each task group
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).times(2);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).times(2);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
    for (Task task : tasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // test that a task succeeding causes a new task to be re-queued with the next offset range and causes any replica
    // tasks to be shutdown
    Capture<Task> newTasksCapture = Capture.newInstance(CaptureType.ALL);
    Capture<String> shutdownTaskIdCapture = Capture.newInstance();
    List<Task> imStillRunning = tasks.subList(1, 4);
    KafkaIndexTask iAmSuccess = (KafkaIndexTask) tasks.get(0);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(imStillRunning).anyTimes();
    for (Task task : imStillRunning) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.expect(taskStorage.getStatus(iAmSuccess.getId())).andReturn(Optional.of(TaskStatus.success(iAmSuccess.getId())));
    EasyMock.expect(taskStorage.getTask(iAmSuccess.getId())).andReturn(Optional.of(iAmSuccess)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(newTasksCapture))).andReturn(true).times(2);
    EasyMock.expect(taskClient.stopAsync(EasyMock.capture(shutdownTaskIdCapture), EasyMock.eq(false))).andReturn(Futures.immediateFuture(true));
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskQueue);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // make sure we killed the right task (sequenceName for replicas are the same)
    Assert.assertTrue(shutdownTaskIdCapture.getValue().contains(iAmSuccess.getIOConfig().getBaseSequenceName()));
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) TreeMap(java.util.TreeMap) Executor(java.util.concurrent.Executor) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

KafkaDataSourceMetadata (org.apache.druid.indexing.kafka.KafkaDataSourceMetadata)33 Test (org.junit.Test)33 KafkaIndexTask (org.apache.druid.indexing.kafka.KafkaIndexTask)30 Executor (java.util.concurrent.Executor)25 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)25 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)20 Task (org.apache.druid.indexing.common.task.Task)20 HashMap (java.util.HashMap)17 ImmutableMap (com.google.common.collect.ImmutableMap)16 Map (java.util.Map)16 TreeMap (java.util.TreeMap)16 ArrayList (java.util.ArrayList)10 Collection (java.util.Collection)10 TaskLocation (org.apache.druid.indexer.TaskLocation)10 KafkaIndexTaskIOConfig (org.apache.druid.indexing.kafka.KafkaIndexTaskIOConfig)9 DateTime (org.joda.time.DateTime)7 Period (org.joda.time.Period)5 TaskReportData (org.apache.druid.indexing.seekablestream.supervisor.TaskReportData)3 SeekableStreamStartSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers)2 File (java.io.File)1