Search in sources :

Example 21 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method push.

@Override
public ListenableFuture<SegmentsAndCommitMetadata> push(final Collection<SegmentIdWithShardSpec> identifiers, @Nullable final Committer committer, final boolean useUniquePath) {
    final Map<SegmentIdWithShardSpec, Sink> theSinks = new HashMap<>();
    AtomicLong pushedHydrantsCount = new AtomicLong();
    for (final SegmentIdWithShardSpec identifier : identifiers) {
        final Sink sink = sinks.get(identifier);
        if (sink == null) {
            throw new ISE("No sink for identifier: %s", identifier);
        }
        theSinks.put(identifier, sink);
        if (sink.finishWriting()) {
            totalRows.addAndGet(-sink.getNumRows());
        }
        // count hydrants for stats:
        pushedHydrantsCount.addAndGet(Iterables.size(sink));
    }
    return Futures.transform(// segments.
    persistAll(committer), (Function<Object, SegmentsAndCommitMetadata>) commitMetadata -> {
        final List<DataSegment> dataSegments = new ArrayList<>();
        log.info("Preparing to push (stats): processed rows: [%d], sinks: [%d], fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), theSinks.size(), pushedHydrantsCount.get());
        log.debug("Building and pushing segments: %s", theSinks.keySet().stream().map(SegmentIdWithShardSpec::toString).collect(Collectors.joining(", ")));
        for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : theSinks.entrySet()) {
            if (droppingSinks.contains(entry.getKey())) {
                log.warn("Skipping push of currently-dropping sink[%s]", entry.getKey());
                continue;
            }
            final DataSegment dataSegment = mergeAndPush(entry.getKey(), entry.getValue(), useUniquePath);
            if (dataSegment != null) {
                dataSegments.add(dataSegment);
            } else {
                log.warn("mergeAndPush[%s] returned null, skipping.", entry.getKey());
            }
        }
        log.info("Push complete...");
        return new SegmentsAndCommitMetadata(dataSegments, commitMetadata);
    }, pushExecutor);
}
Also used : DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) Arrays(java.util.Arrays) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Pair(org.apache.druid.java.util.common.Pair) FileLock(java.nio.channels.FileLock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) IAE(org.apache.druid.java.util.common.IAE) FileUtils(org.apache.druid.java.util.common.FileUtils) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) Execs(org.apache.druid.java.util.common.concurrent.Execs) Closer(org.apache.druid.java.util.common.io.Closer) Collection(java.util.Collection) QueryableIndex(org.apache.druid.segment.QueryableIndex) StandardOpenOption(java.nio.file.StandardOpenOption) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) InputRow(org.apache.druid.data.input.InputRow) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) MutableLong(org.apache.commons.lang.mutable.MutableLong) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Stopwatch(com.google.common.base.Stopwatch) Supplier(com.google.common.base.Supplier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Sink(org.apache.druid.segment.realtime.plumber.Sink) RetryUtils(org.apache.druid.java.util.common.RetryUtils) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Nullable(javax.annotation.Nullable) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ReentrantLock(java.util.concurrent.locks.ReentrantLock) RE(org.apache.druid.java.util.common.RE) IndexMerger(org.apache.druid.segment.IndexMerger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) IOException(java.io.IOException) Ints(com.google.common.primitives.Ints) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) FutureCallback(com.google.common.util.concurrent.FutureCallback) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) Futures(com.google.common.util.concurrent.Futures) Lock(java.util.concurrent.locks.Lock) Closeable(java.io.Closeable) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Cache(org.apache.druid.client.cache.Cache) IndexIO(org.apache.druid.segment.IndexIO) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) FileChannel(java.nio.channels.FileChannel) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Example 22 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method close.

@Override
public void close() {
    if (!closed.compareAndSet(false, true)) {
        log.debug("Appenderator already closed, skipping close() call.");
        return;
    }
    log.debug("Shutting down...");
    final List<ListenableFuture<?>> futures = new ArrayList<>();
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        futures.add(abandonSegment(entry.getKey(), entry.getValue(), false));
    }
    try {
        Futures.allAsList(futures).get();
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        log.warn(e, "Interrupted during close()");
    } catch (ExecutionException e) {
        log.warn(e, "Unable to abandon existing segments during close()");
    }
    try {
        shutdownExecutors();
        Preconditions.checkState(persistExecutor == null || persistExecutor.awaitTermination(365, TimeUnit.DAYS), "persistExecutor not terminated");
        Preconditions.checkState(pushExecutor == null || pushExecutor.awaitTermination(365, TimeUnit.DAYS), "pushExecutor not terminated");
        Preconditions.checkState(intermediateTempExecutor == null || intermediateTempExecutor.awaitTermination(365, TimeUnit.DAYS), "intermediateTempExecutor not terminated");
        persistExecutor = null;
        pushExecutor = null;
        intermediateTempExecutor = null;
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new ISE("Failed to shutdown executors during close()");
    }
    // Only unlock if executors actually shut down.
    unlockBasePersistDirectory();
}
Also used : Sink(org.apache.druid.segment.realtime.plumber.Sink) ArrayList(java.util.ArrayList) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(org.apache.druid.java.util.common.ISE) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 23 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method closeNow.

/**
 * Unannounce the segments and wait for outstanding persists to finish.
 * Do not unlock base persist dir as we are not waiting for push executor to shut down
 * relying on current JVM to shutdown to not cause any locking problem if the task is restored.
 * In case when task is restored and current task is still active because of push executor (which it shouldn't be
 * since push executor starts daemon threads) then the locking should fail and new task should fail to start.
 * This also means that this method should only be called when task is shutting down.
 */
@Override
public void closeNow() {
    if (!closed.compareAndSet(false, true)) {
        log.debug("Appenderator already closed, skipping closeNow() call.");
        return;
    }
    log.debug("Shutting down immediately...");
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        try {
            segmentAnnouncer.unannounceSegment(entry.getValue().getSegment());
        } catch (Exception e) {
            log.makeAlert(e, "Failed to unannounce segment[%s]", schema.getDataSource()).addData("identifier", entry.getKey().toString()).emit();
        }
    }
    try {
        shutdownExecutors();
        // We don't wait for pushExecutor to be terminated. See Javadoc for more details.
        Preconditions.checkState(persistExecutor == null || persistExecutor.awaitTermination(365, TimeUnit.DAYS), "persistExecutor not terminated");
        Preconditions.checkState(intermediateTempExecutor == null || intermediateTempExecutor.awaitTermination(365, TimeUnit.DAYS), "intermediateTempExecutor not terminated");
        persistExecutor = null;
        intermediateTempExecutor = null;
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new ISE("Failed to shutdown executors during close()");
    }
}
Also used : Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 24 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method bootstrapSinksFromDisk.

/**
 * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
 *
 * @return persisted commit metadata
 */
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    }
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    final Committed committed;
    File commitFile = null;
    try {
        commitLock.lock();
        commitFile = computeCommitFile();
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
        }
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    } finally {
        commitLock.unlock();
    }
    int rowsSoFar = 0;
    if (committed.equals(Committed.nil())) {
        log.debug("No previously committed metadata.");
    } else {
        log.info("Loading partially-persisted segments[%s] from[%s] with commit metadata: %s", String.join(", ", committed.getHydrants().keySet()), baseDir, committed.getMetadata());
    }
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
            continue;
        }
        try {
            final SegmentIdWithShardSpec identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdWithShardSpec.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.toString());
            if (committedHydrants <= 0) {
                log.info("Removing uncommitted segment at [%s].", sinkDir);
                FileUtils.deleteDirectory(sinkDir);
                continue;
            }
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles((dir, fileName) -> !(Ints.tryParse(fileName) == null));
            Arrays.sort(sinkFiles, (o1, o2) -> Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())));
            List<FireHydrant> hydrants = new ArrayList<>();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
                    log.info("Removing uncommitted partial segment at [%s]", hydrantDir);
                    FileUtils.deleteDirectory(hydrantDir);
                } else {
                    log.debug("Loading previously persisted partial segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    }
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(indexIO.loadIndex(hydrantDir), identifier.asSegmentId()), hydrantNumber));
                }
            }
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            }
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, null, hydrants);
            rowsSoFar += currSink.getNumRows();
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
            segmentAnnouncer.announceSegment(currSink.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
        }
    }
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), SegmentIdWithShardSpec::toString));
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    }
    totalRows.set(rowsSoFar);
    return committed.getMetadata();
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 25 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
 * be run in the single-threaded pushExecutor.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @param useUniquePath true if the segment should be written to a path with a unique identifier
 *
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean useUniquePath) {
    // noinspection ObjectEquality
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            if (useUniquePath) {
                // Don't reuse the descriptor, because the caller asked for a unique path. Leave the old one as-is, since
                // it might serve some unknown purpose.
                log.debug("Segment[%s] already pushed, but we want a unique path, so will push again with a new path.", identifier);
            } else {
                log.info("Segment[%s] already pushed, skipping.", identifier);
                return objectMapper.readValue(descriptorFile, DataSegment.class);
            }
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        final DataSegment segmentToPush = sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec()));
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// semantics.
        () -> dataSegmentPusher.push(mergedFile, segmentToPush, useUniquePath), exception -> exception instanceof Exception, 5);
        final long pushFinishTime = System.nanoTime();
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Aggregations

ISE (org.apache.druid.java.util.common.ISE)354 IOException (java.io.IOException)95 ArrayList (java.util.ArrayList)90 Map (java.util.Map)68 List (java.util.List)60 File (java.io.File)48 Interval (org.joda.time.Interval)48 DataSegment (org.apache.druid.timeline.DataSegment)44 HashMap (java.util.HashMap)43 Nullable (javax.annotation.Nullable)43 URL (java.net.URL)36 StatusResponseHolder (org.apache.druid.java.util.http.client.response.StatusResponseHolder)33 Request (org.apache.druid.java.util.http.client.Request)30 ExecutionException (java.util.concurrent.ExecutionException)29 ImmutableMap (com.google.common.collect.ImmutableMap)28 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)28 VisibleForTesting (com.google.common.annotations.VisibleForTesting)27 Collectors (java.util.stream.Collectors)27 IAE (org.apache.druid.java.util.common.IAE)27 ImmutableList (com.google.common.collect.ImmutableList)26