Search in sources :

Example 6 with SeekableStreamEndSequenceNumbers

use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.

the class KinesisIndexTaskTest method testMultipleParseExceptionsFailure.

@Test(timeout = 120_000L)
public void testMultipleParseExceptionsFailure() throws Exception {
    reportParseExceptions = false;
    maxParseExceptions = 2;
    maxSavedParseExceptions = 2;
    recordSupplier.assign(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(2, 13)).once();
    recordSupplier.close();
    EasyMock.expectLastCall().once();
    replayAll();
    final KinesisIndexTask task = createTask(null, new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "2"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "9")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
    final ListenableFuture<TaskStatus> future = runTask(task);
    TaskStatus status = future.get();
    // Wait for task to exit
    Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
    verifyAll();
    IndexTaskTest.checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
    // Check metrics
    Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessed());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getProcessedWithError());
    Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
    // Check published metadata
    Assert.assertEquals(ImmutableList.of(), publishedDescriptors());
    Assert.assertNull(newDataSchemaMetadata());
    IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
    Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 3, RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 0));
    Assert.assertEquals(expectedMetrics, reportData.getRowStats());
    List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
    List<String> expectedMessages = Arrays.asList("Unable to parse [] as the intermediateRow resulted in empty input row (Record: 1)", "Unable to parse row [unparseable] (Record: 1)");
    List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
        return ((List<String>) r.get("details")).get(0);
    }).collect(Collectors.toList());
    Assert.assertEquals(expectedMessages, actualMessages);
    List<String> expectedInputs = Arrays.asList("", "unparseable");
    List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
        return (String) r.get("input");
    }).collect(Collectors.toList());
    Assert.assertEquals(expectedInputs, actualInputs);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) DirectQueryProcessingPool(org.apache.druid.query.DirectQueryProcessingPool) Arrays(java.util.Arrays) LookupNodeService(org.apache.druid.discovery.LookupNodeService) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) NoopJoinableFactory(org.apache.druid.segment.join.NoopJoinableFactory) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) AfterClass(org.junit.AfterClass) Execs(org.apache.druid.java.util.common.concurrent.Execs) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) CacheConfig(org.apache.druid.client.cache.CacheConfig) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) Executors(java.util.concurrent.Executors) TaskState(org.apache.druid.indexer.TaskState) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) SeekableStreamIndexTaskTestBase(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTestBase) DruidNodeAnnouncer(org.apache.druid.discovery.DruidNodeAnnouncer) RunWith(org.junit.runner.RunWith) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) TaskStatus(org.apache.druid.indexer.TaskStatus) LinkedHashMap(java.util.LinkedHashMap) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) Nullable(javax.annotation.Nullable) Before(org.junit.Before) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) Executor(java.util.concurrent.Executor) DataSegmentServerAnnouncer(org.apache.druid.server.coordination.DataSegmentServerAnnouncer) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Futures(com.google.common.util.concurrent.Futures) TreeMap(java.util.TreeMap) DefaultQueryRunnerFactoryConglomerate(org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate) DruidNode(org.apache.druid.server.DruidNode) Named(com.google.inject.name.Named) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) Module(com.fasterxml.jackson.databind.Module) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TimeoutException(java.util.concurrent.TimeoutException) TaskResource(org.apache.druid.indexing.common.task.TaskResource) SeekableStreamSupervisor(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor) SequenceMetadata(org.apache.druid.indexing.seekablestream.SequenceMetadata) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Task(org.apache.druid.indexing.common.task.Task) After(org.junit.After) ServerType(org.apache.druid.server.coordination.ServerType) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) DerbyMetadataStorageActionHandlerFactory(org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory) Parameterized(org.junit.runners.Parameterized) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DateTimes(org.apache.druid.java.util.common.DateTimes) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TestUtils(org.apache.druid.indexing.common.TestUtils) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) Objects(java.util.Objects) DataNodeService(org.apache.druid.discovery.DataNodeService) List(java.util.List) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) KinesisSupervisor(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisor) MetadataTaskStorage(org.apache.druid.indexing.overlord.MetadataTaskStorage) MapCache(org.apache.druid.client.cache.MapCache) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) BeforeClass(org.junit.BeforeClass) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) JsonTypeName(com.fasterxml.jackson.annotation.JsonTypeName) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Period(org.joda.time.Period) TestAppenderatorsManager(org.apache.druid.indexing.common.task.TestAppenderatorsManager) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) AWSCredentialsConfig(org.apache.druid.common.aws.AWSCredentialsConfig) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) TimeUnit(java.util.concurrent.TimeUnit) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) MonitorScheduler(org.apache.druid.java.util.metrics.MonitorScheduler) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) TaskStatus(org.apache.druid.indexer.TaskStatus) LinkedHashMap(java.util.LinkedHashMap) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest)

Example 7 with SeekableStreamEndSequenceNumbers

use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.

the class KafkaSupervisorTest method testCreateBaseTaskContexts.

@Test
public void testCreateBaseTaskContexts() throws JsonProcessingException {
    supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null);
    final Map<String, Object> contexts = supervisor.createIndexTasks(1, "seq", OBJECT_MAPPER, new TreeMap<>(), new KafkaIndexTaskIOConfig(0, "seq", new SeekableStreamStartSequenceNumbers<>("test", Collections.emptyMap(), Collections.emptySet()), new SeekableStreamEndSequenceNumbers<>("test", Collections.emptyMap()), Collections.emptyMap(), null, null, null, null, INPUT_FORMAT), new KafkaIndexTaskTuningConfig(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), null).get(0).getContext();
    final Boolean contextValue = (Boolean) contexts.get("IS_INCREMENTAL_HANDOFF_SUPPORTED");
    Assert.assertNotNull(contextValue);
    Assert.assertTrue(contextValue);
}
Also used : KafkaIndexTaskIOConfig(org.apache.druid.indexing.kafka.KafkaIndexTaskIOConfig) KafkaIndexTaskTuningConfig(org.apache.druid.indexing.kafka.KafkaIndexTaskTuningConfig) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test)

Example 8 with SeekableStreamEndSequenceNumbers

use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.

the class KafkaSupervisorTest method testGetOffsetFromStorageForPartitionWithResetOffsetAutomatically.

@Test
public void testGetOffsetFromStorageForPartitionWithResetOffsetAutomatically() throws Exception {
    addSomeEvents(2);
    supervisor = getTestableSupervisor(1, 1, true, true, "PT1H", null, null);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    EasyMock.reset(indexerMetadataStorageCoordinator);
    // unknown DataSourceMetadata in metadata store
    // for simplicity in testing the offset availability check, we use negative stored offsets in metadata here,
    // because the stream's earliest offset is 0, although that would not happen in real usage.
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(1, -100L, 2, 200L)))).times(4);
    // getOffsetFromStorageForPartition() throws an exception when the offsets are automatically reset.
    // Since getOffsetFromStorageForPartition() is called per partition, all partitions can't be reset at the same time.
    // Instead, subsequent partitions will be reset in the following supervisor runs.
    EasyMock.expect(indexerMetadataStorageCoordinator.resetDataSourceMetadata(DATASOURCE, new KafkaDataSourceMetadata(// Only one partition is reset in a single supervisor run.
    new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(2, 200L))))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) Executor(java.util.concurrent.Executor) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test)

Example 9 with SeekableStreamEndSequenceNumbers

use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.

the class KinesisSupervisorTest method testCheckpointForInactiveTaskGroup.

@Test(timeout = 60_000L)
public void testCheckpointForInactiveTaskGroup() throws InterruptedException {
    supervisor = getTestableSupervisor(2, 1, true, "PT1S", null, null, false);
    // not adding any events
    final KinesisIndexTask id1;
    id1 = createKinesisIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "0", SHARD_ID0, "0"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
    final Task id2 = createKinesisIndexTask("id2", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"), ImmutableSet.of(SHARD_ID0, SHARD_ID1)), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
    final Task id3 = createKinesisIndexTask("id3", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"), ImmutableSet.of(SHARD_ID0, SHARD_ID1)), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
    final TaskLocation location1 = new TaskLocation("testHost", 1234, -1);
    final TaskLocation location2 = new TaskLocation("testHost2", 145, -1);
    Collection workItems = new ArrayList<>();
    workItems.add(new TestTaskRunnerWorkItem(id1, null, location1));
    workItems.add(new TestTaskRunnerWorkItem(id2, null, location2));
    workItems.add(new TestTaskRunnerWorkItem(id2, null, location2));
    supervisorRecordSupplier.assign(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID0)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD0_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD1_PARTITION)).andReturn("12").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD0_PARTITION)).andReturn("1").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(id1, id2, id3)).anyTimes();
    EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
    EasyMock.expect(taskStorage.getStatus("id2")).andReturn(Optional.of(TaskStatus.running("id2"))).anyTimes();
    EasyMock.expect(taskStorage.getStatus("id3")).andReturn(Optional.of(TaskStatus.running("id3"))).anyTimes();
    EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
    EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(id2)).anyTimes();
    EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(id3)).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync("id1")).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING));
    EasyMock.expect(taskClient.getStatusAsync("id2")).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING));
    EasyMock.expect(taskClient.getStatusAsync("id3")).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING));
    final DateTime startTime = DateTimes.nowUtc();
    EasyMock.expect(taskClient.getStartTimeAsync("id1")).andReturn(Futures.immediateFuture(startTime));
    EasyMock.expect(taskClient.getStartTimeAsync("id2")).andReturn(Futures.immediateFuture(startTime));
    EasyMock.expect(taskClient.getStartTimeAsync("id3")).andReturn(Futures.immediateFuture(startTime));
    final TreeMap<Integer, Map<String, String>> checkpoints = new TreeMap<>();
    checkpoints.put(0, ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"));
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id2"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id3"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    supervisor.moveTaskGroupToPendingCompletion(0);
    supervisor.checkpoint(0, new KinesisDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(STREAM, checkpoints.get(0), checkpoints.get(0).keySet())));
    while (supervisor.getNoticesQueueSize() > 0) {
        Thread.sleep(100);
    }
    verifyAll();
    Assert.assertNull(serviceEmitter.getStackTrace(), serviceEmitter.getStackTrace());
    Assert.assertNull(serviceEmitter.getExceptionMessage(), serviceEmitter.getExceptionMessage());
    Assert.assertNull(serviceEmitter.getExceptionClass());
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) TaskLocation(org.apache.druid.indexer.TaskLocation) DateTime(org.joda.time.DateTime) Executor(java.util.concurrent.Executor) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) Collection(java.util.Collection) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test)

Example 10 with SeekableStreamEndSequenceNumbers

use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.

the class KinesisSupervisorTest method testShardMergePhaseThree.

/**
 * Test task creation after a shard merge with two closed shards and one open shard, with the closed shards
 * expiring and no longer being returned from record supplier.
 *
 * @param phaseTwoTasks List of tasks from the second phase where closed but not expired shards were present.
 */
private void testShardMergePhaseThree(List<Task> phaseTwoTasks) throws Exception {
    EasyMock.reset(indexerMetadataStorageCoordinator);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    EasyMock.reset(taskMaster);
    EasyMock.reset(taskRunner);
    EasyMock.reset(supervisorRecordSupplier);
    // second set of tasks ran, shard 0 has expired, but shard 1 and 2 have data
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID2, "100")))).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.resetDataSourceMetadata(DATASOURCE, new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.EXPIRED_MARKER, SHARD_ID1, KinesisSequenceNumber.EXPIRED_MARKER, SHARD_ID2, "100"))))).andReturn(true).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID2)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD2_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("200").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<Task> postSplitCaptured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    Task successfulTask0 = phaseTwoTasks.get(0);
    EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(postSplitCaptured))).andReturn(true).times(1);
    replayAll();
    supervisor.runInternal();
    verifyAll();
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
    checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "100"));
    // there would be 1 task, only task group 0 has a shard
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
    List<Task> postSplitTasks = postSplitCaptured.getValues();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postSplitTasks).anyTimes();
    for (Task task : postSplitTasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
    SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "100"), ImmutableSet.of(SHARD_ID2));
    SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    Assert.assertEquals(1, postSplitTasks.size());
    KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postSplitTasks.get(0)).getIOConfig();
    Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
    Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
    Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
    Map<Integer, Set<String>> expectedPartitionGroups = ImmutableMap.of(0, ImmutableSet.of(SHARD_ID2), 1, ImmutableSet.of());
    ConcurrentHashMap<String, String> expectedPartitionOffsets = new ConcurrentHashMap<>(ImmutableMap.of(SHARD_ID2, "-1", SHARD_ID1, "-1", SHARD_ID0, "-1"));
    Assert.assertEquals(expectedPartitionGroups, supervisor.getPartitionGroups());
    Assert.assertEquals(expectedPartitionOffsets, supervisor.getPartitionOffsets());
}
Also used : StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) TreeMap(java.util.TreeMap) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)

Aggregations

SeekableStreamEndSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers)19 SeekableStreamStartSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers)18 Test (org.junit.Test)15 IndexTaskTest (org.apache.druid.indexing.common.task.IndexTaskTest)12 TaskStatus (org.apache.druid.indexer.TaskStatus)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 HashMap (java.util.HashMap)10 Map (java.util.Map)10 TreeMap (java.util.TreeMap)10 Task (org.apache.druid.indexing.common.task.Task)10 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)7 ExpressionTransform (org.apache.druid.segment.transform.ExpressionTransform)7 TransformSpec (org.apache.druid.segment.transform.TransformSpec)7 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableSet (com.google.common.collect.ImmutableSet)6 HashSet (java.util.HashSet)6 List (java.util.List)6 Set (java.util.Set)6 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)6 DataSchema (org.apache.druid.segment.indexing.DataSchema)6