Search in sources :

Example 21 with KafkaIndexTask

use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisorTest method testLateMessageRejectionPeriod.

@Test
public void testLateMessageRejectionPeriod() throws Exception {
    supervisor = getTestableSupervisor(2, 1, true, "PT1H", new Period("PT1H"), null);
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task1 = captured.getValues().get(0);
    KafkaIndexTask task2 = captured.getValues().get(1);
    Assert.assertTrue("minimumMessageTime", task1.getIOConfig().getMinimumMessageTime().get().plusMinutes(59).isBeforeNow());
    Assert.assertTrue("minimumMessageTime", task1.getIOConfig().getMinimumMessageTime().get().plusMinutes(61).isAfterNow());
    Assert.assertEquals(task1.getIOConfig().getMinimumMessageTime().get(), task2.getIOConfig().getMinimumMessageTime().get());
}
Also used : KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Period(org.joda.time.Period) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Test(org.junit.Test)

Example 22 with KafkaIndexTask

use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisorTest method testAlwaysUsesEarliestOffsetForNewlyDiscoveredPartitions.

@Test
public void testAlwaysUsesEarliestOffsetForNewlyDiscoveredPartitions() throws Exception {
    supervisor = getTestableSupervisor(1, 1, false, "PT1H", null, null);
    addSomeEvents(9);
    Capture<KafkaIndexTask> captured = Capture.newInstance();
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task = captured.getValue();
    Assert.assertEquals(10, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
    Assert.assertEquals(10, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
    Assert.assertEquals(10, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
    addMoreEvents(9, 6);
    EasyMock.reset(taskQueue, taskStorage);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    Capture<KafkaIndexTask> tmp = Capture.newInstance();
    EasyMock.expect(taskQueue.add(EasyMock.capture(tmp))).andReturn(true);
    EasyMock.replay(taskStorage, taskQueue);
    supervisor.runInternal();
    verifyAll();
    EasyMock.reset(taskQueue, taskStorage);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    Capture<KafkaIndexTask> newcaptured = Capture.newInstance();
    EasyMock.expect(taskQueue.add(EasyMock.capture(newcaptured))).andReturn(true);
    EasyMock.replay(taskStorage, taskQueue);
    supervisor.runInternal();
    verifyAll();
    // check if start from earliest offset
    task = newcaptured.getValue();
    Assert.assertEquals(0, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(3).longValue());
    Assert.assertEquals(0, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(4).longValue());
    Assert.assertEquals(0, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(5).longValue());
}
Also used : KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Test(org.junit.Test)

Example 23 with KafkaIndexTask

use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisor method createIndexTasks.

@Override
protected List<SeekableStreamIndexTask<Integer, Long, KafkaRecordEntity>> createIndexTasks(int replicas, String baseSequenceName, ObjectMapper sortingMapper, TreeMap<Integer, Map<Integer, Long>> sequenceOffsets, SeekableStreamIndexTaskIOConfig taskIoConfig, SeekableStreamIndexTaskTuningConfig taskTuningConfig, RowIngestionMetersFactory rowIngestionMetersFactory) throws JsonProcessingException {
    final String checkpoints = sortingMapper.writerFor(CHECKPOINTS_TYPE_REF).writeValueAsString(sequenceOffsets);
    final Map<String, Object> context = createBaseTaskContexts();
    context.put(CHECKPOINTS_CTX_KEY, checkpoints);
    // Kafka index task always uses incremental handoff since 0.16.0.
    // The below is for the compatibility when you want to downgrade your cluster to something earlier than 0.16.0.
    // Kafka index task will pick up LegacyKafkaIndexTaskRunner without the below configuration.
    context.put("IS_INCREMENTAL_HANDOFF_SUPPORTED", true);
    List<SeekableStreamIndexTask<Integer, Long, KafkaRecordEntity>> taskList = new ArrayList<>();
    for (int i = 0; i < replicas; i++) {
        String taskId = IdUtils.getRandomIdWithPrefix(baseSequenceName);
        taskList.add(new KafkaIndexTask(taskId, new TaskResource(baseSequenceName, 1), spec.getDataSchema(), (KafkaIndexTaskTuningConfig) taskTuningConfig, (KafkaIndexTaskIOConfig) taskIoConfig, context, sortingMapper));
    }
    return taskList;
}
Also used : KafkaIndexTaskIOConfig(org.apache.druid.indexing.kafka.KafkaIndexTaskIOConfig) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) SeekableStreamIndexTask(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask) KafkaIndexTaskTuningConfig(org.apache.druid.indexing.kafka.KafkaIndexTaskTuningConfig) ArrayList(java.util.ArrayList)

Aggregations

KafkaIndexTask (org.apache.druid.indexing.kafka.KafkaIndexTask)23 Test (org.junit.Test)22 KafkaDataSourceMetadata (org.apache.druid.indexing.kafka.KafkaDataSourceMetadata)21 Executor (java.util.concurrent.Executor)13 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)13 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)11 Task (org.apache.druid.indexing.common.task.Task)11 KafkaIndexTaskIOConfig (org.apache.druid.indexing.kafka.KafkaIndexTaskIOConfig)10 HashMap (java.util.HashMap)9 ImmutableMap (com.google.common.collect.ImmutableMap)8 Map (java.util.Map)8 TreeMap (java.util.TreeMap)8 ArrayList (java.util.ArrayList)7 Collection (java.util.Collection)6 TaskLocation (org.apache.druid.indexer.TaskLocation)6 Period (org.joda.time.Period)4 DateTime (org.joda.time.DateTime)3 File (java.io.File)2 SeekableStreamStartSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers)2 TaskReportData (org.apache.druid.indexing.seekablestream.supervisor.TaskReportData)2