Search in sources :

Example 11 with DataSourceMetadata

use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.

the class SeekableStreamSupervisor method verifyAndMergeCheckpoints.

/**
 * This method does two things -
 * 1. Makes sure the checkpoints information in the taskGroup is consistent with that of the tasks, if not kill
 * inconsistent tasks.
 * 2. truncates the checkpoints in the taskGroup corresponding to which segments have been published, so that any newly
 * created tasks for the taskGroup start indexing from after the latest published sequences.
 */
private void verifyAndMergeCheckpoints(final TaskGroup taskGroup) {
    final int groupId = taskGroup.groupId;
    final List<Pair<String, TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>>> taskSequences = new ArrayList<>();
    final List<ListenableFuture<TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>>> futures = new ArrayList<>();
    final List<String> taskIds = new ArrayList<>();
    for (String taskId : taskGroup.taskIds()) {
        final ListenableFuture<TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>> checkpointsFuture = taskClient.getCheckpointsAsync(taskId, true);
        futures.add(checkpointsFuture);
        taskIds.add(taskId);
    }
    try {
        List<TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>> futuresResult = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
        for (int i = 0; i < futuresResult.size(); i++) {
            final TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>> checkpoints = futuresResult.get(i);
            final String taskId = taskIds.get(i);
            if (checkpoints == null) {
                try {
                    // catch the exception in failed futures
                    futures.get(i).get();
                } catch (Exception e) {
                    stateManager.recordThrowableEvent(e);
                    log.error(e, "Problem while getting checkpoints for task [%s], killing the task", taskId);
                    killTask(taskId, "Exception[%s] while getting checkpoints", e.getClass());
                    taskGroup.tasks.remove(taskId);
                }
            } else if (checkpoints.isEmpty()) {
                log.warn("Ignoring task [%s], as probably it is not started running yet", taskId);
            } else {
                taskSequences.add(new Pair<>(taskId, checkpoints));
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    final DataSourceMetadata rawDataSourceMetadata = indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(dataSource);
    if (rawDataSourceMetadata != null && !checkSourceMetadataMatch(rawDataSourceMetadata)) {
        throw new IAE("Datasource metadata instance does not match required, found instance of [%s]", rawDataSourceMetadata.getClass());
    }
    @SuppressWarnings("unchecked") final SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType> latestDataSourceMetadata = (SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType>) rawDataSourceMetadata;
    final boolean hasValidOffsetsFromDb = latestDataSourceMetadata != null && latestDataSourceMetadata.getSeekableStreamSequenceNumbers() != null && ioConfig.getStream().equals(latestDataSourceMetadata.getSeekableStreamSequenceNumbers().getStream());
    final Map<PartitionIdType, SequenceOffsetType> latestOffsetsFromDb;
    if (hasValidOffsetsFromDb) {
        latestOffsetsFromDb = latestDataSourceMetadata.getSeekableStreamSequenceNumbers().getPartitionSequenceNumberMap();
    } else {
        latestOffsetsFromDb = null;
    }
    // order tasks of this taskGroup by the latest sequenceId
    taskSequences.sort((o1, o2) -> o2.rhs.firstKey().compareTo(o1.rhs.firstKey()));
    final Set<String> tasksToKill = new HashSet<>();
    final AtomicInteger earliestConsistentSequenceId = new AtomicInteger(-1);
    int taskIndex = 0;
    while (taskIndex < taskSequences.size()) {
        TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>> taskCheckpoints = taskSequences.get(taskIndex).rhs;
        String taskId = taskSequences.get(taskIndex).lhs;
        if (earliestConsistentSequenceId.get() == -1) {
            // store
            if (taskCheckpoints.entrySet().stream().anyMatch(sequenceCheckpoint -> sequenceCheckpoint.getValue().entrySet().stream().allMatch(partitionOffset -> {
                OrderedSequenceNumber<SequenceOffsetType> sequence = makeSequenceNumber(partitionOffset.getValue());
                OrderedSequenceNumber<SequenceOffsetType> latestOffset = makeSequenceNumber(latestOffsetsFromDb == null ? partitionOffset.getValue() : latestOffsetsFromDb.getOrDefault(partitionOffset.getKey(), partitionOffset.getValue()));
                return sequence.compareTo(latestOffset) == 0;
            }) && earliestConsistentSequenceId.compareAndSet(-1, sequenceCheckpoint.getKey())) || (pendingCompletionTaskGroups.getOrDefault(groupId, new CopyOnWriteArrayList<>()).size() > 0 && earliestConsistentSequenceId.compareAndSet(-1, taskCheckpoints.firstKey()))) {
                final SortedMap<Integer, Map<PartitionIdType, SequenceOffsetType>> latestCheckpoints = new TreeMap<>(taskCheckpoints.tailMap(earliestConsistentSequenceId.get()));
                log.info("Setting taskGroup sequences to [%s] for group [%d]", latestCheckpoints, groupId);
                taskGroup.checkpointSequences.clear();
                taskGroup.checkpointSequences.putAll(latestCheckpoints);
            } else {
                log.debug("Adding task [%s] to kill list, checkpoints[%s], latestoffsets from DB [%s]", taskId, taskCheckpoints, latestOffsetsFromDb);
                tasksToKill.add(taskId);
            }
        } else {
            // check consistency with taskGroup sequences
            if (taskCheckpoints.get(taskGroup.checkpointSequences.firstKey()) == null || !(taskCheckpoints.get(taskGroup.checkpointSequences.firstKey()).equals(taskGroup.checkpointSequences.firstEntry().getValue())) || taskCheckpoints.tailMap(taskGroup.checkpointSequences.firstKey()).size() != taskGroup.checkpointSequences.size()) {
                log.debug("Adding task [%s] to kill list, checkpoints[%s], taskgroup checkpoints [%s]", taskId, taskCheckpoints, taskGroup.checkpointSequences);
                tasksToKill.add(taskId);
            }
        }
        taskIndex++;
    }
    if ((tasksToKill.size() > 0 && tasksToKill.size() == taskGroup.tasks.size()) || (taskGroup.tasks.size() == 0 && pendingCompletionTaskGroups.getOrDefault(groupId, new CopyOnWriteArrayList<>()).size() == 0)) {
        // killing all tasks or no task left in the group ?
        // clear state about the taskgroup so that get latest sequence information is fetched from metadata store
        log.warn("Clearing task group [%d] information as no valid tasks left the group", groupId);
        activelyReadingTaskGroups.remove(groupId);
        for (PartitionIdType partitionId : taskGroup.startingSequences.keySet()) {
            partitionOffsets.put(partitionId, getNotSetMarker());
        }
    }
    taskSequences.stream().filter(taskIdSequences -> tasksToKill.contains(taskIdSequences.lhs)).forEach(sequenceCheckpoint -> {
        killTask(sequenceCheckpoint.lhs, "Killing task [%s], as its checkpoints [%s] are not consistent with group checkpoints[%s] or latest " + "persisted sequences in metadata store [%s]", sequenceCheckpoint.lhs, sequenceCheckpoint.rhs, taskGroup.checkpointSequences, latestOffsetsFromDb);
        taskGroup.tasks.remove(sequenceCheckpoint.lhs);
    });
}
Also used : SeekableStreamIndexTask(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask) Pair(org.apache.druid.java.util.common.Pair) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) TaskQueue(org.apache.druid.indexing.overlord.TaskQueue) Optional(com.google.common.base.Optional) TaskRunner(org.apache.druid.indexing.overlord.TaskRunner) Duration(java.time.Duration) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) Execs(org.apache.druid.java.util.common.concurrent.Execs) SeekableStreamDataSourceMetadata(org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) NotNull(javax.validation.constraints.NotNull) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap) LagStats(org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats) TaskState(org.apache.druid.indexer.TaskState) Stream(java.util.stream.Stream) Predicate(com.google.common.base.Predicate) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) TaskMaster(org.apache.druid.indexing.overlord.TaskMaster) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Iterables(com.google.common.collect.Iterables) SupervisorStateManager(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager) Callable(java.util.concurrent.Callable) TaskStatus(org.apache.druid.indexer.TaskStatus) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) EntryExistsException(org.apache.druid.metadata.EntryExistsException) SeekableStreamIndexTaskIOConfig(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig) StringComparators(org.apache.druid.query.ordering.StringComparators) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) BiConsumer(java.util.function.BiConsumer) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AutoScalerConfig(org.apache.druid.indexing.seekablestream.supervisor.autoscaler.AutoScalerConfig) RetryUtils(org.apache.druid.java.util.common.RetryUtils) SeekableStreamIndexTaskClientFactory(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClientFactory) Nullable(javax.annotation.Nullable) SeekableStreamIndexTaskTuningConfig(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig) SeekableStreamIndexTaskClient(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClient) BlockingDeque(java.util.concurrent.BlockingDeque) ServiceMetricEvent(org.apache.druid.java.util.emitter.service.ServiceMetricEvent) TaskLocation(org.apache.druid.indexer.TaskLocation) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Futures(com.google.common.util.concurrent.Futures) TaskInfoProvider(org.apache.druid.indexing.common.TaskInfoProvider) TreeMap(java.util.TreeMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) SupervisorReport(org.apache.druid.indexing.overlord.supervisor.SupervisorReport) Preconditions(com.google.common.base.Preconditions) DataSchema(org.apache.druid.segment.indexing.DataSchema) SeekableStreamSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamSequenceNumbers) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) TimeoutException(java.util.concurrent.TimeoutException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Task(org.apache.druid.indexing.common.task.Task) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) OrderedSequenceNumber(org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) List(java.util.List) MetadataSupervisorManager(org.apache.druid.metadata.MetadataSupervisorManager) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) IndexerMetadataStorageCoordinator(org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator) Entry(java.util.Map.Entry) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) SortedMap(java.util.SortedMap) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) HashMap(java.util.HashMap) HashSet(java.util.HashSet) MapperFeature(com.fasterxml.jackson.databind.MapperFeature) ImmutableList(com.google.common.collect.ImmutableList) IndexTaskClient(org.apache.druid.indexing.common.IndexTaskClient) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) ExecutorService(java.util.concurrent.ExecutorService) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TimeUnit(java.util.concurrent.TimeUnit) TaskRunnerWorkItem(org.apache.druid.indexing.overlord.TaskRunnerWorkItem) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DigestUtils(org.apache.commons.codec.digest.DigestUtils) Supervisor(org.apache.druid.indexing.overlord.supervisor.Supervisor) Comparator(java.util.Comparator) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) OrderedSequenceNumber(org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) SeekableStreamDataSourceMetadata(org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata) SeekableStreamDataSourceMetadata(org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata) Pair(org.apache.druid.java.util.common.Pair) HashSet(java.util.HashSet) TreeMap(java.util.TreeMap) IAE(org.apache.druid.java.util.common.IAE) EntryExistsException(org.apache.druid.metadata.EntryExistsException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Map(java.util.Map) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap) TreeMap(java.util.TreeMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 12 with DataSourceMetadata

use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testTransactionalAnnounceRetryAndSuccess.

@Test
public void testTransactionalAnnounceRetryAndSuccess() throws IOException {
    final AtomicLong attemptCounter = new AtomicLong();
    final IndexerSQLMetadataStorageCoordinator failOnceCoordinator = new IndexerSQLMetadataStorageCoordinator(mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector) {

        @Override
        protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle(Handle handle, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata) throws IOException {
            metadataUpdateCounter.getAndIncrement();
            if (attemptCounter.getAndIncrement() == 0) {
                return DataStoreMetadataUpdateResult.TRY_AGAIN;
            } else {
                return super.updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata);
            }
        }
    };
    // Insert first segment.
    final SegmentPublishResult result1 = failOnceCoordinator.announceHistoricalSegments(ImmutableSet.of(defaultSegment), ImmutableSet.of(), new ObjectMetadata(null), new ObjectMetadata(ImmutableMap.of("foo", "bar")));
    Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment)), result1);
    Assert.assertArrayEquals(mapper.writeValueAsString(defaultSegment).getBytes(StandardCharsets.UTF_8), derbyConnector.lookup(derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(), "id", "payload", defaultSegment.getId().toString()));
    // Reset attempt counter to induce another failure.
    attemptCounter.set(0);
    // Insert second segment.
    final SegmentPublishResult result2 = failOnceCoordinator.announceHistoricalSegments(ImmutableSet.of(defaultSegment2), ImmutableSet.of(), new ObjectMetadata(ImmutableMap.of("foo", "bar")), new ObjectMetadata(ImmutableMap.of("foo", "baz")));
    Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment2)), result2);
    Assert.assertArrayEquals(mapper.writeValueAsString(defaultSegment2).getBytes(StandardCharsets.UTF_8), derbyConnector.lookup(derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(), "id", "payload", defaultSegment2.getId().toString()));
    // Examine metadata.
    Assert.assertEquals(new ObjectMetadata(ImmutableMap.of("foo", "baz")), failOnceCoordinator.retrieveDataSourceMetadata("fooDataSource"));
    // Should be tried twice per call.
    Assert.assertEquals(4, metadataUpdateCounter.get());
}
Also used : SegmentPublishResult(org.apache.druid.indexing.overlord.SegmentPublishResult) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) ObjectMetadata(org.apache.druid.indexing.overlord.ObjectMetadata) Handle(org.skife.jdbi.v2.Handle) Test(org.junit.Test)

Aggregations

DataSourceMetadata (org.apache.druid.indexing.overlord.DataSourceMetadata)12 Test (org.junit.Test)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)5 MoreExecutors (com.google.common.util.concurrent.MoreExecutors)5 IOException (java.io.IOException)5 List (java.util.List)5 Set (java.util.Set)5 TimeUnit (java.util.concurrent.TimeUnit)5 Collectors (java.util.stream.Collectors)5 DateTimes (org.apache.druid.java.util.common.DateTimes)5 StringUtils (org.apache.druid.java.util.common.StringUtils)5 Execs (org.apache.druid.java.util.common.concurrent.Execs)5 EmittingLogger (org.apache.druid.java.util.emitter.EmittingLogger)5 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 HashSet (java.util.HashSet)4 Map (java.util.Map)4 Executor (java.util.concurrent.Executor)4