Search in sources :

Example 31 with FireHydrant

use of org.apache.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class StreamAppenderator method abandonSegment.

private ListenableFuture<?> abandonSegment(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean removeOnDiskData) {
    // Ensure no future writes will be made to this sink.
    if (sink.finishWriting()) {
        // Decrement this sink's rows from the counters. we only count active sinks so that we don't double decrement,
        // i.e. those that haven't been persisted for *InMemory counters, or pushed to deep storage for the total counter.
        rowsCurrentlyInMemory.addAndGet(-sink.getNumRowsInMemory());
        bytesCurrentlyInMemory.addAndGet(-sink.getBytesInMemory());
        bytesCurrentlyInMemory.addAndGet(-calculateSinkMemoryInUsed(sink));
        for (FireHydrant hydrant : sink) {
            // Decrement memory used by all Memory Mapped Hydrant
            if (!hydrant.equals(sink.getCurrHydrant())) {
                bytesCurrentlyInMemory.addAndGet(-calculateMMappedHydrantMemoryInUsed(hydrant));
            }
        }
        totalRows.addAndGet(-sink.getNumRows());
    }
    // Mark this identifier as dropping, so no future push tasks will pick it up.
    droppingSinks.add(identifier);
    // Wait for any outstanding pushes to finish, then abandon the segment inside the persist thread.
    return Futures.transform(pushBarrier(), new Function<Object, Void>() {

        @Nullable
        @Override
        public Void apply(@Nullable Object input) {
            if (!sinks.remove(identifier, sink)) {
                log.error("Sink for segment[%s] no longer valid, not abandoning.", identifier);
                return null;
            }
            metrics.setSinkCount(sinks.size());
            if (removeOnDiskData) {
                // Remove this segment from the committed list. This must be done from the persist thread.
                log.debug("Removing commit metadata for segment[%s].", identifier);
                try {
                    commitLock.lock();
                    final Committed oldCommit = readCommit();
                    if (oldCommit != null) {
                        writeCommit(oldCommit.without(identifier.toString()));
                    }
                } catch (Exception e) {
                    log.makeAlert(e, "Failed to update committed segments[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
                    throw new RuntimeException(e);
                } finally {
                    commitLock.unlock();
                }
            }
            // Unannounce the segment.
            try {
                segmentAnnouncer.unannounceSegment(sink.getSegment());
            } catch (Exception e) {
                log.makeAlert(e, "Failed to unannounce segment[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
            }
            droppingSinks.remove(identifier);
            sinkTimeline.remove(sink.getInterval(), sink.getVersion(), identifier.getShardSpec().createChunk(sink));
            for (FireHydrant hydrant : sink) {
                if (cache != null) {
                    cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant));
                }
                hydrant.swapSegment(null);
            }
            if (removeOnDiskData) {
                removeDirectory(computePersistDir(identifier));
            }
            log.info("Dropped segment[%s].", identifier);
            return null;
        }
    }, // starting to abandon segments
    persistExecutor);
}
Also used : FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Nullable(javax.annotation.Nullable) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 32 with FireHydrant

use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.

the class StreamAppenderator method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
 * be run in the single-threaded pushExecutor.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @param useUniquePath true if the segment should be written to a path with a unique identifier
 *
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean useUniquePath) {
    // noinspection ObjectEquality
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            if (useUniquePath) {
                // Don't reuse the descriptor, because the caller asked for a unique path. Leave the old one as-is, since
                // it might serve some unknown purpose.
                log.debug("Segment[%s] already pushed, but we want a unique path, so will push again with a new path.", identifier);
            } else {
                log.info("Segment[%s] already pushed, skipping.", identifier);
                return objectMapper.readValue(descriptorFile, DataSegment.class);
            }
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        final DataSegment segmentToPush = sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec()));
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// semantics.
        () -> dataSegmentPusher.push(mergedFile, segmentToPush, useUniquePath), exception -> exception instanceof Exception, 5);
        final long pushFinishTime = System.nanoTime();
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Example 33 with FireHydrant

use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.

the class AppenderatorImpl method abandonSegment.

private ListenableFuture<?> abandonSegment(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean removeOnDiskData) {
    // Ensure no future writes will be made to this sink.
    if (sink.finishWriting()) {
        // Decrement this sink's rows from the counters. we only count active sinks so that we don't double decrement,
        // i.e. those that haven't been persisted for *InMemory counters, or pushed to deep storage for the total counter.
        rowsCurrentlyInMemory.addAndGet(-sink.getNumRowsInMemory());
        bytesCurrentlyInMemory.addAndGet(-sink.getBytesInMemory());
        bytesCurrentlyInMemory.addAndGet(-calculateSinkMemoryInUsed(sink));
        for (FireHydrant hydrant : sink) {
            // Decrement memory used by all Memory Mapped Hydrant
            if (!hydrant.equals(sink.getCurrHydrant())) {
                bytesCurrentlyInMemory.addAndGet(-calculateMMappedHydrantMemoryInUsed(hydrant));
            }
        }
        totalRows.addAndGet(-sink.getNumRows());
    }
    // Mark this identifier as dropping, so no future push tasks will pick it up.
    droppingSinks.add(identifier);
    // Wait for any outstanding pushes to finish, then abandon the segment inside the persist thread.
    return Futures.transform(pushBarrier(), new Function<Object, Void>() {

        @Nullable
        @Override
        public Void apply(@Nullable Object input) {
            if (!sinks.remove(identifier, sink)) {
                log.error("Sink for segment[%s] no longer valid, not abandoning.", identifier);
                return null;
            }
            metrics.setSinkCount(sinks.size());
            if (removeOnDiskData) {
                // Remove this segment from the committed list. This must be done from the persist thread.
                log.debug("Removing commit metadata for segment[%s].", identifier);
                try {
                    commitLock.lock();
                    final Committed oldCommit = readCommit();
                    if (oldCommit != null) {
                        writeCommit(oldCommit.without(identifier.toString()));
                    }
                } catch (Exception e) {
                    log.makeAlert(e, "Failed to update committed segments[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
                    throw new RuntimeException(e);
                } finally {
                    commitLock.unlock();
                }
            }
            // Unannounce the segment.
            try {
                segmentAnnouncer.unannounceSegment(sink.getSegment());
            } catch (Exception e) {
                log.makeAlert(e, "Failed to unannounce segment[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
            }
            droppingSinks.remove(identifier);
            sinkTimeline.remove(sink.getInterval(), sink.getVersion(), identifier.getShardSpec().createChunk(sink));
            for (FireHydrant hydrant : sink) {
                if (cache != null) {
                    cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant));
                }
                hydrant.swapSegment(null);
                // remove hydrant from persisted metadata:
                persistedHydrantMetadata.remove(hydrant);
            }
            if (removeOnDiskData) {
                removeDirectory(computePersistDir(identifier));
            }
            log.info("Dropped segment[%s].", identifier);
            return null;
        }
    }, // starting to abandon segments
    persistExecutor);
}
Also used : FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Nullable(javax.annotation.Nullable) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 34 with FireHydrant

use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.

the class AppenderatorImpl method persistAll.

@Override
public ListenableFuture<Object> persistAll(@Nullable final Committer committer) {
    throwPersistErrorIfExists();
    final Map<String, Integer> currentHydrants = new HashMap<>();
    final List<Pair<FireHydrant, SegmentIdWithShardSpec>> indexesToPersist = new ArrayList<>();
    int numPersistedRows = 0;
    long bytesPersisted = 0L;
    MutableLong totalHydrantsCount = new MutableLong();
    MutableLong totalHydrantsPersisted = new MutableLong();
    final long totalSinks = sinks.size();
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        final SegmentIdWithShardSpec identifier = entry.getKey();
        final Sink sink = entry.getValue();
        if (sink == null) {
            throw new ISE("No sink for identifier: %s", identifier);
        }
        final List<FireHydrant> hydrants = Lists.newArrayList(sink);
        totalHydrantsCount.add(hydrants.size());
        currentHydrants.put(identifier.toString(), hydrants.size());
        numPersistedRows += sink.getNumRowsInMemory();
        bytesPersisted += sink.getBytesInMemory();
        final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
        // gather hydrants that have not been persisted:
        for (FireHydrant hydrant : hydrants.subList(0, limit)) {
            if (!hydrant.hasSwapped()) {
                log.debug("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
                indexesToPersist.add(Pair.of(hydrant, identifier));
                totalHydrantsPersisted.add(1);
            }
        }
        if (sink.swappable()) {
            // It is swappable. Get the old one to persist it and create a new one:
            indexesToPersist.add(Pair.of(sink.swap(), identifier));
            totalHydrantsPersisted.add(1);
        }
    }
    log.debug("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
    final Object commitMetadata = committer == null ? null : committer.getMetadata();
    final Stopwatch runExecStopwatch = Stopwatch.createStarted();
    final Stopwatch persistStopwatch = Stopwatch.createStarted();
    AtomicLong totalPersistedRows = new AtomicLong(numPersistedRows);
    final ListenableFuture<Object> future = persistExecutor.submit(new Callable<Object>() {

        @Override
        public Object call() throws IOException {
            try {
                for (Pair<FireHydrant, SegmentIdWithShardSpec> pair : indexesToPersist) {
                    metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
                }
                if (committer != null) {
                    log.debug("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(currentHydrants.entrySet().stream().map(entry -> StringUtils.format("%s:%d", entry.getKey(), entry.getValue())).collect(Collectors.toList())));
                    committer.run();
                    try {
                        commitLock.lock();
                        final Map<String, Integer> commitHydrants = new HashMap<>();
                        final Committed oldCommit = readCommit();
                        if (oldCommit != null) {
                            // merge current hydrants with existing hydrants
                            commitHydrants.putAll(oldCommit.getHydrants());
                        }
                        commitHydrants.putAll(currentHydrants);
                        writeCommit(new Committed(commitHydrants, commitMetadata));
                    } finally {
                        commitLock.unlock();
                    }
                }
                log.info("Flushed in-memory data with commit metadata [%s] for segments: %s", commitMetadata, indexesToPersist.stream().map(itp -> itp.rhs.asSegmentId().toString()).distinct().collect(Collectors.joining(", ")));
                log.info("Persisted stats: processed rows: [%d], persisted rows[%d], sinks: [%d], total fireHydrants (across sinks): [%d], persisted fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), totalPersistedRows.get(), totalSinks, totalHydrantsCount.longValue(), totalHydrantsPersisted.longValue());
                // return null if committer is null
                return commitMetadata;
            } catch (IOException e) {
                metrics.incrementFailedPersists();
                throw e;
            } finally {
                metrics.incrementNumPersists();
                metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
                persistStopwatch.stop();
            }
        }
    });
    final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
    metrics.incrementPersistBackPressureMillis(startDelay);
    if (startDelay > WARN_DELAY) {
        log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
    }
    runExecStopwatch.stop();
    resetNextFlush();
    // NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
    rowsCurrentlyInMemory.addAndGet(-numPersistedRows);
    bytesCurrentlyInMemory.addAndGet(-bytesPersisted);
    log.info("Persisted rows[%,d] and (estimated) bytes[%,d]", numPersistedRows, bytesPersisted);
    return future;
}
Also used : IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Stopwatch(com.google.common.base.Stopwatch) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Pair(org.apache.druid.java.util.common.Pair) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MutableLong(org.apache.commons.lang.mutable.MutableLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 35 with FireHydrant

use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.

the class BatchAppenderator method getSinkForIdentifierPath.

private Sink getSinkForIdentifierPath(SegmentIdWithShardSpec identifier, File identifierPath) throws IOException {
    // To avoid reading and listing of "merged" dir and other special files
    final File[] sinkFiles = identifierPath.listFiles((dir, fileName) -> !(Ints.tryParse(fileName) == null));
    if (sinkFiles == null) {
        throw new ISE("Problem reading persisted sinks in path[%s]", identifierPath);
    }
    Arrays.sort(sinkFiles, (o1, o2) -> Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())));
    List<FireHydrant> hydrants = new ArrayList<>();
    for (File hydrantDir : sinkFiles) {
        final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
        log.debug("Loading previously persisted partial segment at [%s]", hydrantDir);
        if (hydrantNumber != hydrants.size()) {
            throw new ISE("Missing hydrant [%,d] in identifier [%s].", hydrants.size(), identifier);
        }
        hydrants.add(new FireHydrant(new QueryableIndexSegment(indexIO.loadIndex(hydrantDir), identifier.asSegmentId()), hydrantNumber));
    }
    Sink retVal = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, null, hydrants);
    // this sink is not writable
    retVal.finishWriting();
    return retVal;
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) Sink(org.apache.druid.segment.realtime.plumber.Sink) ArrayList(java.util.ArrayList) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File)

Aggregations

FireHydrant (org.apache.druid.segment.realtime.FireHydrant)38 IOException (java.io.IOException)26 ArrayList (java.util.ArrayList)26 IndexSizeExceededException (org.apache.druid.segment.incremental.IndexSizeExceededException)22 File (java.io.File)20 ISE (org.apache.druid.java.util.common.ISE)20 QueryableIndex (org.apache.druid.segment.QueryableIndex)16 ExecutionException (java.util.concurrent.ExecutionException)12 Nullable (javax.annotation.Nullable)12 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)12 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)12 Sink (org.apache.druid.segment.realtime.plumber.Sink)12 Closer (org.apache.druid.java.util.common.io.Closer)10 BaseProgressIndicator (org.apache.druid.segment.BaseProgressIndicator)10 DataSegment (org.apache.druid.timeline.DataSegment)10 Interval (org.joda.time.Interval)10 Stopwatch (com.google.common.base.Stopwatch)8 Closeable (java.io.Closeable)8 List (java.util.List)8 Pair (org.apache.druid.java.util.common.Pair)8