Search in sources :

Example 1 with KafkaDataSourceMetadata

use of io.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testLatestOffset.

@Test
public /**
   * Test generating the starting offsets from the partition high water marks in Kafka.
   */
void testLatestOffset() throws Exception {
    supervisor = getSupervisor(1, 1, false, "PT1H", null);
    addSomeEvents(1100);
    Capture<KafkaIndexTask> captured = Capture.newInstance();
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task = captured.getValue();
    Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(2));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Example 2 with KafkaDataSourceMetadata

use of io.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisor method resetInternal.

@VisibleForTesting
void resetInternal(DataSourceMetadata dataSourceMetadata) {
    if (dataSourceMetadata == null) {
        // Reset everything
        boolean result = indexerMetadataStorageCoordinator.deleteDataSourceMetadata(dataSource);
        log.info("Reset dataSource[%s] - dataSource metadata entry deleted? [%s]", dataSource, result);
        killTaskGroupForPartitions(JavaCompatUtils.keySet(taskGroups));
    } else if (!(dataSourceMetadata instanceof KafkaDataSourceMetadata)) {
        throw new IAE("Expected KafkaDataSourceMetadata but found instance of [%s]", dataSourceMetadata.getClass());
    } else {
        // Reset only the partitions in dataSourceMetadata if it has not been reset yet
        final KafkaDataSourceMetadata resetKafkaMetadata = (KafkaDataSourceMetadata) dataSourceMetadata;
        if (resetKafkaMetadata.getKafkaPartitions().getTopic().equals(ioConfig.getTopic())) {
            // metadata can be null
            final DataSourceMetadata metadata = indexerMetadataStorageCoordinator.getDataSourceMetadata(dataSource);
            if (metadata != null && !(metadata instanceof KafkaDataSourceMetadata)) {
                throw new IAE("Expected KafkaDataSourceMetadata from metadata store but found instance of [%s]", metadata.getClass());
            }
            final KafkaDataSourceMetadata currentMetadata = (KafkaDataSourceMetadata) metadata;
            // defend against consecutive reset requests from replicas
            // as well as the case where the metadata store do not have an entry for the reset partitions
            boolean doReset = false;
            for (Map.Entry<Integer, Long> resetPartitionOffset : resetKafkaMetadata.getKafkaPartitions().getPartitionOffsetMap().entrySet()) {
                final Long partitionOffsetInMetadataStore = currentMetadata == null ? null : currentMetadata.getKafkaPartitions().getPartitionOffsetMap().get(resetPartitionOffset.getKey());
                final TaskGroup partitionTaskGroup = taskGroups.get(getTaskGroupIdForPartition(resetPartitionOffset.getKey()));
                if (partitionOffsetInMetadataStore != null || (partitionTaskGroup != null && partitionTaskGroup.partitionOffsets.get(resetPartitionOffset.getKey()).equals(resetPartitionOffset.getValue()))) {
                    doReset = true;
                    break;
                }
            }
            if (!doReset) {
                return;
            }
            boolean metadataUpdateSuccess = false;
            if (currentMetadata == null) {
                metadataUpdateSuccess = true;
            } else {
                final DataSourceMetadata newMetadata = currentMetadata.minus(resetKafkaMetadata);
                try {
                    metadataUpdateSuccess = indexerMetadataStorageCoordinator.resetDataSourceMetadata(dataSource, newMetadata);
                } catch (IOException e) {
                    log.error("Resetting DataSourceMetadata failed [%s]", e.getMessage());
                    Throwables.propagate(e);
                }
            }
            if (metadataUpdateSuccess) {
                killTaskGroupForPartitions(JavaCompatUtils.keySet(resetKafkaMetadata.getKafkaPartitions().getPartitionOffsetMap()));
            } else {
                throw new ISE("Unable to reset metadata");
            }
        } else {
            log.warn("Reset metadata topic [%s] and supervisor's topic [%s] do not match", resetKafkaMetadata.getKafkaPartitions().getTopic(), ioConfig.getTopic());
        }
    }
}
Also used : DataSourceMetadata(io.druid.indexing.overlord.DataSourceMetadata) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) ISE(io.druid.java.util.common.ISE) IOException(java.io.IOException) IAE(io.druid.java.util.common.IAE) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with KafkaDataSourceMetadata

use of io.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testDatasourceMetadata.

@Test
public /**
   * Test generating the starting offsets from the partition data stored in druid_dataSource which contains the
   * offsets of the last built segments.
   */
void testDatasourceMetadata() throws Exception {
    supervisor = getSupervisor(1, 1, true, "PT1H", null);
    addSomeEvents(100);
    Capture<KafkaIndexTask> captured = Capture.newInstance();
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(new KafkaPartitions(KAFKA_TOPIC, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L)))).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task = captured.getValue();
    KafkaIOConfig taskConfig = task.getIOConfig();
    Assert.assertEquals(String.format("sequenceName-0", DATASOURCE), taskConfig.getBaseSequenceName());
    Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(30L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaPartitions(io.druid.indexing.kafka.KafkaPartitions) KafkaIOConfig(io.druid.indexing.kafka.KafkaIOConfig) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Example 4 with KafkaDataSourceMetadata

use of io.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testBeginPublishAndQueueNextTasks.

@Test
public void testBeginPublishAndQueueNextTasks() throws Exception {
    final TaskLocation location = new TaskLocation("testHost", 1234);
    supervisor = getSupervisor(2, 2, true, "PT1M", null);
    addSomeEvents(100);
    Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true).times(4);
    taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    List<Task> tasks = captured.getValues();
    Collection workItems = new ArrayList<>();
    for (Task task : tasks) {
        workItems.add(new TestTaskRunnerWorkItem(task.getId(), null, location));
    }
    reset(taskStorage, taskRunner, taskClient, taskQueue);
    captured = Capture.newInstance(CaptureType.ALL);
    expect(taskStorage.getActiveTasks()).andReturn(tasks).anyTimes();
    for (Task task : tasks) {
        expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
    expect(taskClient.getStatusAsync(anyString())).andReturn(Futures.immediateFuture(KafkaIndexTask.Status.READING)).anyTimes();
    expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTime.now().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTime.now()));
    expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTime.now())).times(2);
    expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 20L, 2, 30L))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 15L, 2, 35L)));
    expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(0, 10L, 1, 20L, 2, 35L)), EasyMock.eq(true))).andReturn(Futures.immediateFuture(true)).times(2);
    expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
    replay(taskStorage, taskRunner, taskClient, taskQueue);
    supervisor.runInternal();
    verifyAll();
    for (Task task : captured.getValues()) {
        KafkaIndexTask kafkaIndexTask = (KafkaIndexTask) task;
        Assert.assertEquals(dataSchema, kafkaIndexTask.getDataSchema());
        Assert.assertEquals(KafkaTuningConfig.copyOf(tuningConfig), kafkaIndexTask.getTuningConfig());
        KafkaIOConfig taskConfig = kafkaIndexTask.getIOConfig();
        Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
        Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
        Assert.assertFalse("pauseAfterRead", taskConfig.isPauseAfterRead());
        Assert.assertEquals(KAFKA_TOPIC, taskConfig.getStartPartitions().getTopic());
        Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
        Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
        Assert.assertEquals(35L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
    }
}
Also used : TaskRunnerListener(io.druid.indexing.overlord.TaskRunnerListener) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) ArrayList(java.util.ArrayList) TaskLocation(io.druid.indexing.common.TaskLocation) Executor(java.util.concurrent.Executor) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIOConfig(io.druid.indexing.kafka.KafkaIOConfig) Collection(java.util.Collection) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 5 with KafkaDataSourceMetadata

use of io.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testMultiTask.

@Test
public void testMultiTask() throws Exception {
    supervisor = getSupervisor(1, 2, true, "PT1H", null);
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task1 = captured.getValues().get(0);
    Assert.assertEquals(2, task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(2, task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(2));
    Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(2));
    KafkaIndexTask task2 = captured.getValues().get(1);
    Assert.assertEquals(1, task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(1, task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(0L, (long) task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(Long.MAX_VALUE, (long) task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(1));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Aggregations

KafkaDataSourceMetadata (io.druid.indexing.kafka.KafkaDataSourceMetadata)24 RealtimeIndexTask (io.druid.indexing.common.task.RealtimeIndexTask)23 Task (io.druid.indexing.common.task.Task)23 KafkaIndexTask (io.druid.indexing.kafka.KafkaIndexTask)23 Test (org.junit.Test)23 TaskRunnerListener (io.druid.indexing.overlord.TaskRunnerListener)17 Executor (java.util.concurrent.Executor)17 KafkaPartitions (io.druid.indexing.kafka.KafkaPartitions)12 TaskLocation (io.druid.indexing.common.TaskLocation)8 ArrayList (java.util.ArrayList)8 Collection (java.util.Collection)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 KafkaIOConfig (io.druid.indexing.kafka.KafkaIOConfig)7 Map (java.util.Map)7 TaskRunner (io.druid.indexing.overlord.TaskRunner)6 DateTime (org.joda.time.DateTime)4 SupervisorReport (io.druid.indexing.overlord.supervisor.SupervisorReport)3 EasyMock.anyString (org.easymock.EasyMock.anyString)3 DataSourceMetadata (io.druid.indexing.overlord.DataSourceMetadata)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1