Search in sources :

Example 71 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method close.

@Override
public void close() {
    if (!closed.compareAndSet(false, true)) {
        log.debug("Appenderator already closed, skipping close() call.");
        return;
    }
    log.debug("Shutting down...");
    final List<ListenableFuture<?>> futures = new ArrayList<>();
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        futures.add(abandonSegment(entry.getKey(), entry.getValue(), false));
    }
    try {
        Futures.allAsList(futures).get();
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        log.warn(e, "Interrupted during close()");
    } catch (ExecutionException e) {
        log.warn(e, "Unable to abandon existing segments during close()");
    }
    try {
        shutdownExecutors();
        Preconditions.checkState(persistExecutor == null || persistExecutor.awaitTermination(365, TimeUnit.DAYS), "persistExecutor not terminated");
        Preconditions.checkState(pushExecutor == null || pushExecutor.awaitTermination(365, TimeUnit.DAYS), "pushExecutor not terminated");
        Preconditions.checkState(intermediateTempExecutor == null || intermediateTempExecutor.awaitTermination(365, TimeUnit.DAYS), "intermediateTempExecutor not terminated");
        persistExecutor = null;
        pushExecutor = null;
        intermediateTempExecutor = null;
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new ISE("Failed to shutdown executors during close()");
    }
    // Only unlock if executors actually shut down.
    unlockBasePersistDirectory();
}
Also used : Sink(org.apache.druid.segment.realtime.plumber.Sink) ArrayList(java.util.ArrayList) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(org.apache.druid.java.util.common.ISE) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 72 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method closeNow.

/**
 * Unannounce the segments and wait for outstanding persists to finish.
 * Do not unlock base persist dir as we are not waiting for push executor to shut down
 * relying on current JVM to shutdown to not cause any locking problem if the task is restored.
 * In case when task is restored and current task is still active because of push executor (which it shouldn't be
 * since push executor starts daemon threads) then the locking should fail and new task should fail to start.
 * This also means that this method should only be called when task is shutting down.
 */
@Override
public void closeNow() {
    if (!closed.compareAndSet(false, true)) {
        log.debug("Appenderator already closed, skipping closeNow() call.");
        return;
    }
    log.debug("Shutting down immediately...");
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        try {
            segmentAnnouncer.unannounceSegment(entry.getValue().getSegment());
        } catch (Exception e) {
            log.makeAlert(e, "Failed to unannounce segment[%s]", schema.getDataSource()).addData("identifier", entry.getKey().toString()).emit();
        }
    }
    try {
        shutdownExecutors();
        // We don't wait for pushExecutor to be terminated. See Javadoc for more details.
        Preconditions.checkState(persistExecutor == null || persistExecutor.awaitTermination(365, TimeUnit.DAYS), "persistExecutor not terminated");
        Preconditions.checkState(intermediateTempExecutor == null || intermediateTempExecutor.awaitTermination(365, TimeUnit.DAYS), "intermediateTempExecutor not terminated");
        persistExecutor = null;
        intermediateTempExecutor = null;
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new ISE("Failed to shutdown executors during close()");
    }
}
Also used : Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 73 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method bootstrapSinksFromDisk.

/**
 * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
 *
 * @return persisted commit metadata
 */
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    }
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    final Committed committed;
    File commitFile = null;
    try {
        commitLock.lock();
        commitFile = computeCommitFile();
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
        }
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    } finally {
        commitLock.unlock();
    }
    int rowsSoFar = 0;
    if (committed.equals(Committed.nil())) {
        log.debug("No previously committed metadata.");
    } else {
        log.info("Loading partially-persisted segments[%s] from[%s] with commit metadata: %s", String.join(", ", committed.getHydrants().keySet()), baseDir, committed.getMetadata());
    }
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
            continue;
        }
        try {
            final SegmentIdWithShardSpec identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdWithShardSpec.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.toString());
            if (committedHydrants <= 0) {
                log.info("Removing uncommitted segment at [%s].", sinkDir);
                FileUtils.deleteDirectory(sinkDir);
                continue;
            }
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles((dir, fileName) -> !(Ints.tryParse(fileName) == null));
            Arrays.sort(sinkFiles, (o1, o2) -> Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())));
            List<FireHydrant> hydrants = new ArrayList<>();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
                    log.info("Removing uncommitted partial segment at [%s]", hydrantDir);
                    FileUtils.deleteDirectory(hydrantDir);
                } else {
                    log.debug("Loading previously persisted partial segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    }
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(indexIO.loadIndex(hydrantDir), identifier.asSegmentId()), hydrantNumber));
                }
            }
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            }
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, null, hydrants);
            rowsSoFar += currSink.getNumRows();
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
            segmentAnnouncer.announceSegment(currSink.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
        }
    }
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), SegmentIdWithShardSpec::toString));
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    }
    totalRows.set(rowsSoFar);
    return committed.getMetadata();
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 74 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class StreamAppenderator method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
 * be run in the single-threaded pushExecutor.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @param useUniquePath true if the segment should be written to a path with a unique identifier
 *
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean useUniquePath) {
    // noinspection ObjectEquality
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            if (useUniquePath) {
                // Don't reuse the descriptor, because the caller asked for a unique path. Leave the old one as-is, since
                // it might serve some unknown purpose.
                log.debug("Segment[%s] already pushed, but we want a unique path, so will push again with a new path.", identifier);
            } else {
                log.info("Segment[%s] already pushed, skipping.", identifier);
                return objectMapper.readValue(descriptorFile, DataSegment.class);
            }
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        final DataSegment segmentToPush = sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec()));
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// semantics.
        () -> dataSegmentPusher.push(mergedFile, segmentToPush, useUniquePath), exception -> exception instanceof Exception, 5);
        final long pushFinishTime = System.nanoTime();
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Example 75 with ISE

use of org.apache.druid.java.util.common.ISE in project druid by druid-io.

the class ClientQuerySegmentWalker method getQueryRunnerForIntervals.

@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
    final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    // transform TableDataSource to GlobalTableDataSource when eligible
    // before further transformation to potentially inline
    // Populate the subquery ids of the subquery id present in the main query
    Query<T> newQuery = query.withDataSource(generateSubqueryIds(query.getDataSource(), query.getId(), query.getSqlQueryId()));
    final DataSource freeTradeDataSource = globalizeIfPossible(newQuery.getDataSource());
    // do an inlining dry run to see if any inlining is necessary, without actually running the queries.
    final int maxSubqueryRows = QueryContexts.getMaxSubqueryRows(query, serverConfig.getMaxSubqueryRows());
    final DataSource inlineDryRun = inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, true);
    if (!canRunQueryUsingClusterWalker(query.withDataSource(inlineDryRun)) && !canRunQueryUsingLocalWalker(query.withDataSource(inlineDryRun))) {
        // Dry run didn't go well.
        throw new ISE("Cannot handle subquery structure for dataSource: %s", query.getDataSource());
    }
    // Now that we know the structure is workable, actually do the inlining (if necessary).
    newQuery = newQuery.withDataSource(inlineIfNecessary(freeTradeDataSource, toolChest, new AtomicInteger(), maxSubqueryRows, false));
    if (canRunQueryUsingLocalWalker(newQuery)) {
        // No need to decorate since LocalQuerySegmentWalker does its own.
        return new QuerySwappingQueryRunner<>(localClient.getQueryRunnerForIntervals(newQuery, intervals), query, newQuery);
    } else if (canRunQueryUsingClusterWalker(newQuery)) {
        // See https://github.com/apache/druid/issues/9229 for details.
        return new QuerySwappingQueryRunner<>(decorateClusterRunner(newQuery, clusterClient.getQueryRunnerForIntervals(newQuery, intervals)), query, newQuery);
    } else {
        // that can't be run with either the local or cluster walkers. If this message ever shows up it is a bug.
        throw new ISE("Inlined query could not be run");
    }
}
Also used : Query(org.apache.druid.query.Query) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ISE(org.apache.druid.java.util.common.ISE) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) InlineDataSource(org.apache.druid.query.InlineDataSource) TableDataSource(org.apache.druid.query.TableDataSource)

Aggregations

ISE (org.apache.druid.java.util.common.ISE)354 IOException (java.io.IOException)95 ArrayList (java.util.ArrayList)90 Map (java.util.Map)68 List (java.util.List)60 File (java.io.File)48 Interval (org.joda.time.Interval)48 DataSegment (org.apache.druid.timeline.DataSegment)44 HashMap (java.util.HashMap)43 Nullable (javax.annotation.Nullable)43 URL (java.net.URL)36 StatusResponseHolder (org.apache.druid.java.util.http.client.response.StatusResponseHolder)33 Request (org.apache.druid.java.util.http.client.Request)30 ExecutionException (java.util.concurrent.ExecutionException)29 ImmutableMap (com.google.common.collect.ImmutableMap)28 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)28 VisibleForTesting (com.google.common.annotations.VisibleForTesting)27 Collectors (java.util.stream.Collectors)27 IAE (org.apache.druid.java.util.common.IAE)27 ImmutableList (com.google.common.collect.ImmutableList)26