Search in sources :

Example 16 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class SeekableStreamIndexTaskRunner method runInternal.

private TaskStatus runInternal(TaskToolbox toolbox) throws Exception {
    startTime = DateTimes.nowUtc();
    status = Status.STARTING;
    setToolbox(toolbox);
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
    // Now we can initialize StreamChunkReader with the given toolbox.
    final StreamChunkParser parser = new StreamChunkParser<RecordType>(this.parser, inputFormat, inputRowSchema, task.getDataSchema().getTransformSpec(), toolbox.getIndexingTmpDir(), row -> row != null && task.withinMinMaxRecordTime(row), rowIngestionMeters, parseExceptionHandler);
    initializeSequences();
    log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
    toolbox.getChatHandlerProvider().register(task.getId(), this, false);
    runThread = Thread.currentThread();
    // Set up FireDepartmentMetrics
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(task.getDataSchema(), new RealtimeIOConfig(null, null), null);
    this.fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    toolbox.addMonitor(TaskRealtimeMetricsMonitorBuilder.build(task, fireDepartmentForMetrics, rowIngestionMeters));
    final String lookupTier = task.getContextValue(RealtimeIndexTask.CTX_KEY_LOOKUP_TIER);
    final LookupNodeService lookupNodeService = lookupTier == null ? toolbox.getLookupNodeService() : new LookupNodeService(lookupTier);
    final DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
    Throwable caughtExceptionOuter = null;
    // milliseconds waited for created segments to be handed off
    long handoffWaitMs = 0L;
    try (final RecordSupplier<PartitionIdType, SequenceOffsetType, RecordType> recordSupplier = task.newTaskRecordSupplier()) {
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        appenderator = task.newAppenderator(toolbox, fireDepartmentMetrics, rowIngestionMeters, parseExceptionHandler);
        driver = task.newDriver(appenderator, toolbox, fireDepartmentMetrics);
        // Start up, set up initial sequences.
        final Object restoredMetadata = driver.startJob(segmentId -> {
            try {
                if (lockGranularityToUse == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        if (restoredMetadata == null) {
            // no persist has happened so far
            // so either this is a brand new task or replacement of a failed task
            Preconditions.checkState(sequences.get(0).startOffsets.entrySet().stream().allMatch(partitionOffsetEntry -> createSequenceNumber(partitionOffsetEntry.getValue()).compareTo(createSequenceNumber(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(partitionOffsetEntry.getKey()))) >= 0), "Sequence sequences are not compatible with start sequences of task");
            currOffsets.putAll(sequences.get(0).startOffsets);
        } else {
            @SuppressWarnings("unchecked") final Map<String, Object> restoredMetadataMap = (Map) restoredMetadata;
            final SeekableStreamEndSequenceNumbers<PartitionIdType, SequenceOffsetType> restoredNextPartitions = deserializePartitionsFromMetadata(toolbox.getJsonMapper(), restoredMetadataMap.get(METADATA_NEXT_PARTITIONS));
            currOffsets.putAll(restoredNextPartitions.getPartitionSequenceNumberMap());
            // Sanity checks.
            if (!restoredNextPartitions.getStream().equals(ioConfig.getStartSequenceNumbers().getStream())) {
                throw new ISE("Restored stream[%s] but expected stream[%s]", restoredNextPartitions.getStream(), ioConfig.getStartSequenceNumbers().getStream());
            }
            if (!currOffsets.keySet().equals(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet())) {
                throw new ISE("Restored partitions[%s] but expected partitions[%s]", currOffsets.keySet(), ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet());
            }
            // which is super rare
            if (sequences.size() == 0 || getLastSequenceMetadata().isCheckpointed()) {
                this.endOffsets.putAll(sequences.size() == 0 ? currOffsets : getLastSequenceMetadata().getEndOffsets());
            }
        }
        log.info("Initialized sequences: %s", sequences.stream().map(SequenceMetadata::toString).collect(Collectors.joining(", ")));
        // Filter out partitions with END_OF_SHARD markers since these partitions have already been fully read. This
        // should have been done by the supervisor already so this is defensive.
        int numPreFilterPartitions = currOffsets.size();
        if (currOffsets.entrySet().removeIf(x -> isEndOfShard(x.getValue()))) {
            log.info("Removed [%d] partitions from assignment which have already been closed.", numPreFilterPartitions - currOffsets.size());
        }
        // When end offsets are exclusive, we never skip the start record.
        if (!isEndOffsetExclusive()) {
            for (Map.Entry<PartitionIdType, SequenceOffsetType> entry : currOffsets.entrySet()) {
                final boolean isAtStart = entry.getValue().equals(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(entry.getKey()));
                if (!isAtStart || ioConfig.getStartSequenceNumbers().getExclusivePartitions().contains(entry.getKey())) {
                    lastReadOffsets.put(entry.getKey(), entry.getValue());
                }
            }
        }
        // Set up committer.
        final Supplier<Committer> committerSupplier = () -> {
            final Map<PartitionIdType, SequenceOffsetType> snapshot = ImmutableMap.copyOf(currOffsets);
            lastPersistedOffsets.clear();
            lastPersistedOffsets.putAll(snapshot);
            return new Committer() {

                @Override
                public Object getMetadata() {
                    return ImmutableMap.of(METADATA_NEXT_PARTITIONS, new SeekableStreamEndSequenceNumbers<>(stream, snapshot));
                }

                @Override
                public void run() {
                // Do nothing.
                }
            };
        };
        // restart publishing of sequences (if any)
        maybePersistAndPublishSequences(committerSupplier);
        Set<StreamPartition<PartitionIdType>> assignment = assignPartitions(recordSupplier);
        possiblyResetDataSourceMetadata(toolbox, recordSupplier, assignment);
        seekToStartingSequence(recordSupplier, assignment);
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Main loop.
        // Could eventually support leader/follower mode (for keeping replicas more in sync)
        boolean stillReading = !assignment.isEmpty();
        status = Status.READING;
        Throwable caughtExceptionInner = null;
        try {
            while (stillReading) {
                if (possiblyPause()) {
                    // The partition assignments may have changed while paused by a call to setEndOffsets() so reassign
                    // partitions upon resuming. Don't call "seekToStartingSequence" after "assignPartitions", because there's
                    // no need to re-seek here. All we're going to be doing is dropping partitions.
                    assignment = assignPartitions(recordSupplier);
                    possiblyResetDataSourceMetadata(toolbox, recordSupplier, assignment);
                    if (assignment.isEmpty()) {
                        log.debug("All partitions have been fully read.");
                        publishOnStop.set(true);
                        stopRequested.set(true);
                    }
                }
                // if stop is requested or task's end sequence is set by call to setEndOffsets method with finish set to true
                if (stopRequested.get() || sequences.size() == 0 || getLastSequenceMetadata().isCheckpointed()) {
                    status = Status.PUBLISHING;
                }
                if (stopRequested.get()) {
                    break;
                }
                if (backgroundThreadException != null) {
                    throw new RuntimeException(backgroundThreadException);
                }
                checkPublishAndHandoffFailure();
                maybePersistAndPublishSequences(committerSupplier);
                // calling getRecord() ensures that exceptions specific to kafka/kinesis like OffsetOutOfRangeException
                // are handled in the subclasses.
                List<OrderedPartitionableRecord<PartitionIdType, SequenceOffsetType, RecordType>> records = getRecords(recordSupplier, toolbox);
                // note: getRecords() also updates assignment
                stillReading = !assignment.isEmpty();
                SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceToCheckpoint = null;
                for (OrderedPartitionableRecord<PartitionIdType, SequenceOffsetType, RecordType> record : records) {
                    final boolean shouldProcess = verifyRecordInRange(record.getPartitionId(), record.getSequenceNumber());
                    log.trace("Got stream[%s] partition[%s] sequenceNumber[%s], shouldProcess[%s].", record.getStream(), record.getPartitionId(), record.getSequenceNumber(), shouldProcess);
                    if (shouldProcess) {
                        final List<InputRow> rows = parser.parse(record.getData(), isEndOfShard(record.getSequenceNumber()));
                        boolean isPersistRequired = false;
                        final SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceToUse = sequences.stream().filter(sequenceMetadata -> sequenceMetadata.canHandle(this, record)).findFirst().orElse(null);
                        if (sequenceToUse == null) {
                            throw new ISE("Cannot find any valid sequence for record with partition [%s] and sequenceNumber [%s]. Current sequences: %s", record.getPartitionId(), record.getSequenceNumber(), sequences);
                        }
                        for (InputRow row : rows) {
                            final AppenderatorDriverAddResult addResult = driver.add(row, sequenceToUse.getSequenceName(), committerSupplier, true, // of rows are indexed
                            false);
                            if (addResult.isOk()) {
                                // If the number of rows in the segment exceeds the threshold after adding a row,
                                // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment.
                                final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                                if (isPushRequired && !sequenceToUse.isCheckpointed()) {
                                    sequenceToCheckpoint = sequenceToUse;
                                }
                                isPersistRequired |= addResult.isPersistRequired();
                            } else {
                                // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                                throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp());
                            }
                        }
                        if (isPersistRequired) {
                            Futures.addCallback(driver.persistAsync(committerSupplier.get()), new FutureCallback<Object>() {

                                @Override
                                public void onSuccess(@Nullable Object result) {
                                    log.debug("Persist completed with metadata: %s", result);
                                }

                                @Override
                                public void onFailure(Throwable t) {
                                    log.error("Persist failed, dying");
                                    backgroundThreadException = t;
                                }
                            });
                        }
                        // in kafka, we can easily get the next offset by adding 1, but for kinesis, there's no way
                        // to get the next sequence number without having to make an expensive api call. So the behavior
                        // here for kafka is to +1 while for kinesis we simply save the current sequence number
                        lastReadOffsets.put(record.getPartitionId(), record.getSequenceNumber());
                        currOffsets.put(record.getPartitionId(), getNextStartOffset(record.getSequenceNumber()));
                    }
                    // Use record.getSequenceNumber() in the moreToRead check, since currOffsets might not have been
                    // updated if we were skipping records for being beyond the end.
                    final boolean moreToReadAfterThisRecord = isMoreToReadAfterReadingRecord(record.getSequenceNumber(), endOffsets.get(record.getPartitionId()));
                    if (!moreToReadAfterThisRecord && assignment.remove(record.getStreamPartition())) {
                        log.info("Finished reading stream[%s], partition[%s].", record.getStream(), record.getPartitionId());
                        recordSupplier.assign(assignment);
                        stillReading = !assignment.isEmpty();
                    }
                }
                if (!stillReading) {
                    // We let the fireDepartmentMetrics know that all messages have been read. This way, some metrics such as
                    // high message gap need not be reported
                    fireDepartmentMetrics.markProcessingDone();
                }
                if (System.currentTimeMillis() > nextCheckpointTime) {
                    sequenceToCheckpoint = getLastSequenceMetadata();
                }
                if (sequenceToCheckpoint != null && stillReading) {
                    Preconditions.checkArgument(getLastSequenceMetadata().getSequenceName().equals(sequenceToCheckpoint.getSequenceName()), "Cannot checkpoint a sequence [%s] which is not the latest one, sequences %s", sequenceToCheckpoint, sequences);
                    requestPause();
                    final CheckPointDataSourceMetadataAction checkpointAction = new CheckPointDataSourceMetadataAction(task.getDataSource(), ioConfig.getTaskGroupId(), null, createDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(stream, sequenceToCheckpoint.getStartOffsets(), sequenceToCheckpoint.getExclusiveStartPartitions())));
                    if (!toolbox.getTaskActionClient().submit(checkpointAction)) {
                        throw new ISE("Checkpoint request with sequences [%s] failed, dying", currOffsets);
                    }
                }
            }
            ingestionState = IngestionState.COMPLETED;
        } catch (Exception e) {
            // (1) catch all exceptions while reading from kafka
            caughtExceptionInner = e;
            log.error(e, "Encountered exception in run() before persisting.");
            throw e;
        } finally {
            try {
                // persist pending data
                driver.persist(committerSupplier.get());
            } catch (Exception e) {
                if (caughtExceptionInner != null) {
                    caughtExceptionInner.addSuppressed(e);
                } else {
                    throw e;
                }
            }
        }
        synchronized (statusLock) {
            if (stopRequested.get() && !publishOnStop.get()) {
                throw new InterruptedException("Stopping without publishing");
            }
            status = Status.PUBLISHING;
        }
        // We need to copy sequences here, because the success callback in publishAndRegisterHandoff removes items from
        // the sequence list. If a publish finishes before we finish iterating through the sequence list, we can
        // end up skipping some sequences.
        List<SequenceMetadata<PartitionIdType, SequenceOffsetType>> sequencesSnapshot = new ArrayList<>(sequences);
        for (int i = 0; i < sequencesSnapshot.size(); i++) {
            final SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceMetadata = sequencesSnapshot.get(i);
            if (!publishingSequences.contains(sequenceMetadata.getSequenceName())) {
                final boolean isLast = i == (sequencesSnapshot.size() - 1);
                if (isLast) {
                    // Shorten endOffsets of the last sequence to match currOffsets.
                    sequenceMetadata.setEndOffsets(currOffsets);
                }
                // Update assignments of the sequence, which should clear them. (This will be checked later, when the
                // Committer is built.)
                sequenceMetadata.updateAssignments(currOffsets, this::isMoreToReadAfterReadingRecord);
                publishingSequences.add(sequenceMetadata.getSequenceName());
                // persist already done in finally, so directly add to publishQueue
                publishAndRegisterHandoff(sequenceMetadata);
            }
        }
        if (backgroundThreadException != null) {
            throw new RuntimeException(backgroundThreadException);
        }
        // Wait for publish futures to complete.
        Futures.allAsList(publishWaitList).get();
        // Wait for handoff futures to complete.
        // Note that every publishing task (created by calling AppenderatorDriver.publish()) has a corresponding
        // handoffFuture. handoffFuture can throw an exception if 1) the corresponding publishFuture failed or 2) it
        // failed to persist sequences. It might also return null if handoff failed, but was recoverable.
        // See publishAndRegisterHandoff() for details.
        List<SegmentsAndCommitMetadata> handedOffList = Collections.emptyList();
        if (tuningConfig.getHandoffConditionTimeout() == 0) {
            handedOffList = Futures.allAsList(handOffWaitList).get();
        } else {
            final long start = System.nanoTime();
            try {
                handedOffList = Futures.allAsList(handOffWaitList).get(tuningConfig.getHandoffConditionTimeout(), TimeUnit.MILLISECONDS);
            } catch (TimeoutException e) {
                // Handoff timeout is not an indexing failure, but coordination failure. We simply ignore timeout exception
                // here.
                log.makeAlert("Timeout waiting for handoff").addData("taskId", task.getId()).addData("handoffConditionTimeout", tuningConfig.getHandoffConditionTimeout()).emit();
            } finally {
                handoffWaitMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
            }
        }
        for (SegmentsAndCommitMetadata handedOff : handedOffList) {
            log.info("Handoff complete for segments: %s", String.join(", ", Lists.transform(handedOff.getSegments(), DataSegment::toString)));
        }
        appenderator.close();
    } catch (InterruptedException | RejectedExecutionException e) {
        // (2) catch InterruptedException and RejectedExecutionException thrown for the whole ingestion steps including
        // the final publishing.
        caughtExceptionOuter = e;
        try {
            Futures.allAsList(publishWaitList).cancel(true);
            Futures.allAsList(handOffWaitList).cancel(true);
            if (appenderator != null) {
                appenderator.closeNow();
            }
        } catch (Exception e2) {
            e.addSuppressed(e2);
        }
        // handle the InterruptedException that gets wrapped in a RejectedExecutionException
        if (e instanceof RejectedExecutionException && (e.getCause() == null || !(e.getCause() instanceof InterruptedException))) {
            throw e;
        }
        // if we were interrupted because we were asked to stop, handle the exception and return success, else rethrow
        if (!stopRequested.get()) {
            Thread.currentThread().interrupt();
            throw e;
        }
    } catch (Exception e) {
        // (3) catch all other exceptions thrown for the whole ingestion steps including the final publishing.
        caughtExceptionOuter = e;
        try {
            Futures.allAsList(publishWaitList).cancel(true);
            Futures.allAsList(handOffWaitList).cancel(true);
            if (appenderator != null) {
                appenderator.closeNow();
            }
        } catch (Exception e2) {
            e.addSuppressed(e2);
        }
        throw e;
    } finally {
        try {
            if (driver != null) {
                driver.close();
            }
            toolbox.getChatHandlerProvider().unregister(task.getId());
            if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
                toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
                toolbox.getDataSegmentServerAnnouncer().unannounce();
            }
        } catch (Throwable e) {
            if (caughtExceptionOuter != null) {
                caughtExceptionOuter.addSuppressed(e);
            } else {
                throw e;
            }
        }
    }
    toolbox.getTaskReportFileWriter().write(task.getId(), getTaskCompletionReports(null, handoffWaitMs));
    return TaskStatus.success(task.getId());
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) IngestionState(org.apache.druid.indexer.IngestionState) MediaType(javax.ws.rs.core.MediaType) Future(java.util.concurrent.Future) Map(java.util.Map) InputFormat(org.apache.druid.data.input.InputFormat) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) NotNull(javax.validation.constraints.NotNull) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) IndexTaskUtils(org.apache.druid.indexing.common.task.IndexTaskUtils) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) InputRowSchema(org.apache.druid.data.input.InputRowSchema) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) HttpServletRequest(javax.servlet.http.HttpServletRequest) Lists(com.google.common.collect.Lists) CheckPointDataSourceMetadataAction(org.apache.druid.indexing.common.actions.CheckPointDataSourceMetadataAction) Nullable(javax.annotation.Nullable) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FutureCallback(com.google.common.util.concurrent.FutureCallback) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Futures(com.google.common.util.concurrent.Futures) Lock(java.util.concurrent.locks.Lock) TreeMap(java.util.TreeMap) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) ResetDataSourceMetadataAction(org.apache.druid.indexing.common.actions.ResetDataSourceMetadataAction) SettableFuture(com.google.common.util.concurrent.SettableFuture) SeekableStreamSupervisor(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor) ByteBuffer(java.nio.ByteBuffer) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) Consumes(javax.ws.rs.Consumes) TaskLock(org.apache.druid.indexing.common.TaskLock) DefaultValue(javax.ws.rs.DefaultValue) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) ImmutableSet(com.google.common.collect.ImmutableSet) Context(javax.ws.rs.core.Context) ImmutableMap(com.google.common.collect.ImmutableMap) InputRowSchemas(org.apache.druid.indexing.input.InputRowSchemas) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LockGranularity(org.apache.druid.indexing.common.LockGranularity) OrderedSequenceNumber(org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber) List(java.util.List) Response(javax.ws.rs.core.Response) TaskLockType(org.apache.druid.indexing.common.TaskLockType) DataSegment(org.apache.druid.timeline.DataSegment) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) NodeRole(org.apache.druid.discovery.NodeRole) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ConcurrentMap(java.util.concurrent.ConcurrentMap) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) HashSet(java.util.HashSet) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) Access(org.apache.druid.server.security.Access) POST(javax.ws.rs.POST) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TimeUnit(java.util.concurrent.TimeUnit) Condition(java.util.concurrent.locks.Condition) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) CheckPointDataSourceMetadataAction(org.apache.druid.indexing.common.actions.CheckPointDataSourceMetadataAction) TaskLock(org.apache.druid.indexing.common.TaskLock) TimeoutException(java.util.concurrent.TimeoutException) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) DataSegment(org.apache.druid.timeline.DataSegment) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ISE(org.apache.druid.java.util.common.ISE) LookupNodeService(org.apache.druid.discovery.LookupNodeService) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) Committer(org.apache.druid.data.input.Committer)

Example 17 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class RealtimePlumberSchoolTest method testPersist.

private void testPersist(final Object commitMetadata) throws Exception {
    Sink sink = new Sink(Intervals.utc(0, TimeUnit.HOURS.toMillis(1)), schema, tuningConfig.getShardSpec(), DateTimes.of("2014-12-01T12:34:56.789").toString(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
    plumber.getSinks().put(0L, sink);
    Assert.assertNull(plumber.startJob());
    final InputRow row = EasyMock.createNiceMock(InputRow.class);
    EasyMock.expect(row.getTimestampFromEpoch()).andReturn(0L);
    EasyMock.expect(row.getDimensions()).andReturn(new ArrayList<String>());
    EasyMock.replay(row);
    final CountDownLatch doneSignal = new CountDownLatch(1);
    final Committer committer = new Committer() {

        @Override
        public Object getMetadata() {
            return commitMetadata;
        }

        @Override
        public void run() {
            doneSignal.countDown();
        }
    };
    plumber.add(row, Suppliers.ofInstance(committer));
    plumber.persist(committer);
    doneSignal.await();
    plumber.getSinks().clear();
    plumber.finishJob();
}
Also used : InputRow(org.apache.druid.data.input.InputRow) Committer(org.apache.druid.data.input.Committer) CountDownLatch(java.util.concurrent.CountDownLatch)

Example 18 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class StreamAppenderatorTest method testTaskCleanupInMemoryCounterAfterCloseWithRowInMemory.

@Test
public void testTaskCleanupInMemoryCounterAfterCloseWithRowInMemory() throws Exception {
    try (final StreamAppenderatorTester tester = new StreamAppenderatorTester(100, 10000, true)) {
        final Appenderator appenderator = tester.getAppenderator();
        final AtomicInteger eventCount = new AtomicInteger(0);
        final Supplier<Committer> committerSupplier = () -> {
            final Object metadata = ImmutableMap.of(eventCount, eventCount.get());
            return new Committer() {

                @Override
                public Object getMetadata() {
                    return metadata;
                }

                @Override
                public void run() {
                // Do nothing
                }
            };
        };
        appenderator.startJob();
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "foo", 1), committerSupplier);
        // Still under maxSizeInBytes after the add. Hence, we do not persist yet
        int nullHandlingOverhead = NullHandling.sqlCompatible() ? 1 : 0;
        int currentInMemoryIndexSize = 182 + nullHandlingOverhead;
        int sinkSizeOverhead = 1 * StreamAppenderator.ROUGH_OVERHEAD_PER_SINK;
        Assert.assertEquals(currentInMemoryIndexSize + sinkSizeOverhead, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
        // Close with row still in memory (no persist)
        appenderator.close();
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Committer(org.apache.druid.data.input.Committer) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 19 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class StreamAppenderatorTest method testMaxRowsInMemory.

@Test
public void testMaxRowsInMemory() throws Exception {
    try (final StreamAppenderatorTester tester = new StreamAppenderatorTester(3, true)) {
        final Appenderator appenderator = tester.getAppenderator();
        final AtomicInteger eventCount = new AtomicInteger(0);
        final Supplier<Committer> committerSupplier = new Supplier<Committer>() {

            @Override
            public Committer get() {
                final Object metadata = ImmutableMap.of("eventCount", eventCount.get());
                return new Committer() {

                    @Override
                    public Object getMetadata() {
                        return metadata;
                    }

                    @Override
                    public void run() {
                    // Do nothing
                    }
                };
            }
        };
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.startJob();
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "foo", 1), committerSupplier);
        Assert.assertEquals(1, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "bar", 1), committerSupplier);
        Assert.assertEquals(2, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "bar", 1), committerSupplier);
        Assert.assertEquals(2, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "baz", 1), committerSupplier);
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "qux", 1), committerSupplier);
        Assert.assertEquals(1, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "bob", 1), committerSupplier);
        Assert.assertEquals(2, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.persistAll(committerSupplier.get());
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.close();
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Supplier(com.google.common.base.Supplier) Committer(org.apache.druid.data.input.Committer) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 20 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class StreamAppenderatorTest method testMaxBytesInMemoryInMultipleSinks.

@Test
public void testMaxBytesInMemoryInMultipleSinks() throws Exception {
    try (final StreamAppenderatorTester tester = new StreamAppenderatorTester(100, 31100, true)) {
        final Appenderator appenderator = tester.getAppenderator();
        final AtomicInteger eventCount = new AtomicInteger(0);
        final Supplier<Committer> committerSupplier = () -> {
            final Object metadata = ImmutableMap.of(eventCount, eventCount.get());
            return new Committer() {

                @Override
                public Object getMetadata() {
                    return metadata;
                }

                @Override
                public void run() {
                // Do nothing
                }
            };
        };
        appenderator.startJob();
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "foo", 1), committerSupplier);
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "bar", 1), committerSupplier);
        // Still under maxSizeInBytes after the add. Hence, we do not persist yet
        // expectedSizeInBytes = 44(map overhead) + 28 (TimeAndDims overhead) + 56 (aggregator metrics) + 54 (dimsKeySize) = 182 + 1 byte when null handling is enabled
        int nullHandlingOverhead = NullHandling.sqlCompatible() ? 1 : 0;
        int currentInMemoryIndexSize = 182 + nullHandlingOverhead;
        int sinkSizeOverhead = 2 * StreamAppenderator.ROUGH_OVERHEAD_PER_SINK;
        // currHydrant in the sink still has > 0 bytesInMemory since we do not persist yet
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(0)));
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(1)));
        Assert.assertEquals((2 * currentInMemoryIndexSize) + sinkSizeOverhead, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
        // We do multiple more adds to the same sink to cause persist.
        for (int i = 0; i < 49; i++) {
            appenderator.add(IDENTIFIERS.get(0), ir("2000", "bar_" + i, 1), committerSupplier);
            appenderator.add(IDENTIFIERS.get(1), ir("2000", "bar_" + i, 1), committerSupplier);
        }
        sinkSizeOverhead = 2 * StreamAppenderator.ROUGH_OVERHEAD_PER_SINK;
        // currHydrant size is 0 since we just persist all indexes to disk.
        currentInMemoryIndexSize = 0;
        // We are now over maxSizeInBytes after the add. Hence, we do a persist.
        // currHydrant in the sink has 0 bytesInMemory since we just did a persist
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(0)));
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(1)));
        // Mapped index size is the memory still needed after we persisted indexes. Note that the segments have
        // 1 dimension columns, 2 metric column, 1 time column.
        int mappedIndexSize = 2 * (1012 + (2 * StreamAppenderator.ROUGH_OVERHEAD_PER_METRIC_COLUMN_HOLDER) + StreamAppenderator.ROUGH_OVERHEAD_PER_DIMENSION_COLUMN_HOLDER + StreamAppenderator.ROUGH_OVERHEAD_PER_TIME_COLUMN_HOLDER);
        Assert.assertEquals(currentInMemoryIndexSize + sinkSizeOverhead + mappedIndexSize, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
        // Add a single row after persisted to sink 0
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "bob", 1), committerSupplier);
        // currHydrant in the sink still has > 0 bytesInMemory since we do not persist yet
        currentInMemoryIndexSize = 182 + nullHandlingOverhead;
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(0)));
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(1)));
        Assert.assertEquals(currentInMemoryIndexSize + sinkSizeOverhead + mappedIndexSize, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
        // Now add a single row to sink 1
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "bob", 1), committerSupplier);
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(0)));
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(1)));
        Assert.assertEquals((2 * currentInMemoryIndexSize) + sinkSizeOverhead + mappedIndexSize, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
        // We do multiple more adds to the both sink to cause persist.
        for (int i = 0; i < 34; i++) {
            appenderator.add(IDENTIFIERS.get(0), ir("2000", "bar_" + i, 1), committerSupplier);
            appenderator.add(IDENTIFIERS.get(1), ir("2000", "bar_" + i, 1), committerSupplier);
        }
        // currHydrant size is 0 since we just persist all indexes to disk.
        currentInMemoryIndexSize = 0;
        // We are now over maxSizeInBytes after the add. Hence, we do a persist.
        // currHydrant in the sink has 0 bytesInMemory since we just did a persist
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(0)));
        Assert.assertEquals(currentInMemoryIndexSize, ((StreamAppenderator) appenderator).getBytesInMemory(IDENTIFIERS.get(1)));
        // Mapped index size is the memory still needed after we persisted indexes. Note that the segments have
        // 1 dimension columns, 2 metric column, 1 time column. However, we have two indexes now from the two pervious
        // persists.
        mappedIndexSize = 2 * (2 * (1012 + (2 * StreamAppenderator.ROUGH_OVERHEAD_PER_METRIC_COLUMN_HOLDER) + StreamAppenderator.ROUGH_OVERHEAD_PER_DIMENSION_COLUMN_HOLDER + StreamAppenderator.ROUGH_OVERHEAD_PER_TIME_COLUMN_HOLDER));
        Assert.assertEquals(currentInMemoryIndexSize + sinkSizeOverhead + mappedIndexSize, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
        appenderator.close();
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getBytesCurrentlyInMemory());
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Committer(org.apache.druid.data.input.Committer) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

Committer (org.apache.druid.data.input.Committer)22 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)13 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 Test (org.junit.Test)11 Supplier (com.google.common.base.Supplier)8 IOException (java.io.IOException)8 List (java.util.List)7 ISE (org.apache.druid.java.util.common.ISE)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 Preconditions (com.google.common.base.Preconditions)6 Futures (com.google.common.util.concurrent.Futures)6 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)6 File (java.io.File)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 Map (java.util.Map)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Function (com.google.common.base.Function)5 Lists (com.google.common.collect.Lists)5