Search in sources :

Example 6 with BaseProgressIndicator

use of org.apache.druid.segment.BaseProgressIndicator in project druid by druid-io.

the class BatchAppenderator method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink) {
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    if (sink.isWritable()) {
        throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
    }
    int numHydrants = 0;
    for (FireHydrant hydrant : sink) {
        if (!hydrant.hasSwapped()) {
            throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
        }
        numHydrants++;
    }
    SinkMetadata sm = sinksMetadata.get(identifier);
    if (sm == null) {
        log.warn("Sink metadata not found just before merge for identifier [%s]", identifier);
    } else if (numHydrants != sm.getNumHydrants()) {
        throw new ISE("Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]", numHydrants, identifier, sm.getNumHydrants());
    }
    try {
        if (descriptorFile.exists()) {
            // Already pushed.
            log.info("Segment[%s] already pushed, skipping.", identifier);
            return objectMapper.readValue(descriptorFile, DataSegment.class);
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// This appenderator is used only for the local indexing task so unique paths are not required
        () -> dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false), exception -> exception instanceof Exception, 5);
        // can generate OOMs during merge if enough of them are held back...
        for (FireHydrant fireHydrant : sink) {
            fireHydrant.swapSegment(null);
        }
        // cleanup, sink no longer needed
        removeDirectory(computePersistDir(identifier));
        final long pushFinishTime = System.nanoTime();
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Aggregations

File (java.io.File)6 ArrayList (java.util.ArrayList)6 BaseProgressIndicator (org.apache.druid.segment.BaseProgressIndicator)6 QueryableIndex (org.apache.druid.segment.QueryableIndex)6 IOException (java.io.IOException)5 Closer (org.apache.druid.java.util.common.io.Closer)5 ISE (org.apache.druid.java.util.common.ISE)4 IndexSizeExceededException (org.apache.druid.segment.incremental.IndexSizeExceededException)4 FireHydrant (org.apache.druid.segment.realtime.FireHydrant)4 DataSegment (org.apache.druid.timeline.DataSegment)4 Closeable (java.io.Closeable)3 Nullable (javax.annotation.Nullable)3 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)3 ExecutionException (java.util.concurrent.ExecutionException)2 Stopwatch (com.google.common.base.Stopwatch)1 List (java.util.List)1 ThreadRenamingRunnable (org.apache.druid.common.guava.ThreadRenamingRunnable)1 InputRow (org.apache.druid.data.input.InputRow)1 Pair (org.apache.druid.java.util.common.Pair)1 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)1