Search in sources :

Example 46 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class RealtimePlumber method persistAndMerge.

// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
    final String threadName = StringUtils.format("%s-%s-persist-n-merge", schema.getDataSource(), DateTimes.utc(truncatedTime));
    mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {

        final Interval interval = sink.getInterval();

        Stopwatch mergeStopwatch = null;

        @Override
        public void doRun() {
            try {
                // Bail out if this sink has been abandoned by a previously-executed task.
                if (sinks.get(truncatedTime) != sink) {
                    log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
                    return;
                }
                // Use a file to indicate that pushing has completed.
                final File persistDir = computePersistDir(schema, interval);
                final File mergedTarget = new File(persistDir, "merged");
                final File isPushedMarker = new File(persistDir, "isPushedMarker");
                if (!isPushedMarker.exists()) {
                    removeSegment(sink, mergedTarget);
                    if (mergedTarget.exists()) {
                        log.warn("Merged target[%s] still exists after attempt to delete it; skipping push.", mergedTarget);
                        return;
                    }
                } else {
                    log.info("Already pushed sink[%s]", sink);
                    return;
                }
                /*
            Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
            hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
            changes to plumber.
             */
                for (FireHydrant hydrant : sink) {
                    synchronized (hydrant) {
                        if (!hydrant.hasSwapped()) {
                            log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
                            final int rowCount = persistHydrant(hydrant, schema, interval, null);
                            metrics.incrementRowOutputCount(rowCount);
                        }
                    }
                }
                final long mergeThreadCpuTime = JvmUtils.safeGetThreadCpuTime();
                mergeStopwatch = Stopwatch.createStarted();
                final File mergedFile;
                List<QueryableIndex> indexes = new ArrayList<>();
                Closer closer = Closer.create();
                try {
                    for (FireHydrant fireHydrant : sink) {
                        Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                        final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                        log.info("Adding hydrant[%s]", fireHydrant);
                        indexes.add(queryableIndex);
                        closer.register(segmentAndCloseable.rhs);
                    }
                    mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), null, mergedTarget, config.getIndexSpec(), config.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), config.getSegmentWriteOutMediumFactory(), -1);
                } catch (Throwable t) {
                    throw closer.rethrow(t);
                } finally {
                    closer.close();
                }
                // emit merge metrics before publishing segment
                metrics.incrementMergeCpuTime(JvmUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
                metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
                log.info("Pushing [%s] to deep storage", sink.getSegment().getId());
                DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false);
                log.info("Inserting [%s] to the metadata store", sink.getSegment().getId());
                segmentPublisher.publishSegment(segment);
                if (!isPushedMarker.createNewFile()) {
                    log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
                }
            } catch (Exception e) {
                metrics.incrementFailedHandoffs();
                log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
                if (shuttingDown) {
                    // We're trying to shut down, and this segment failed to push. Let's just get rid of it.
                    // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                    cleanShutdown = false;
                    abandonSegment(truncatedTime, sink);
                }
            } finally {
                if (mergeStopwatch != null) {
                    mergeStopwatch.stop();
                }
            }
        }
    });
    handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {

        @Override
        public void run() {
            abandonSegment(sink.getInterval().getStartMillis(), sink);
            metrics.incrementHandOffCount();
        }
    });
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Stopwatch(com.google.common.base.Stopwatch) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(org.apache.druid.segment.QueryableIndex) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ThreadRenamingRunnable(org.apache.druid.common.guava.ThreadRenamingRunnable) List(java.util.List) ArrayList(java.util.ArrayList) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) ThreadRenamingRunnable(org.apache.druid.common.guava.ThreadRenamingRunnable) File(java.io.File) Interval(org.joda.time.Interval) Pair(org.apache.druid.java.util.common.Pair) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator)

Example 47 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class ApproximateHistogramGroupByQueryTest method constructorFeeder.

@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() {
    final GroupByQueryConfig v1Config = new GroupByQueryConfig() {

        @Override
        public String getDefaultStrategy() {
            return GroupByStrategySelector.STRATEGY_V1;
        }

        @Override
        public String toString() {
            return "v1";
        }
    };
    final GroupByQueryConfig v1SingleThreadedConfig = new GroupByQueryConfig() {

        @Override
        public boolean isSingleThreaded() {
            return true;
        }

        @Override
        public String getDefaultStrategy() {
            return GroupByStrategySelector.STRATEGY_V1;
        }

        @Override
        public String toString() {
            return "v1SingleThreaded";
        }
    };
    final GroupByQueryConfig v2Config = new GroupByQueryConfig() {

        @Override
        public String getDefaultStrategy() {
            return GroupByStrategySelector.STRATEGY_V2;
        }

        @Override
        public String toString() {
            return "v2";
        }
    };
    v1Config.setMaxIntermediateRows(10000);
    v1SingleThreadedConfig.setMaxIntermediateRows(10000);
    final List<Object[]> constructors = new ArrayList<>();
    final List<GroupByQueryConfig> configs = ImmutableList.of(v1Config, v1SingleThreadedConfig, v2Config);
    for (GroupByQueryConfig config : configs) {
        final Pair<GroupByQueryRunnerFactory, Closer> factoryAndCloser = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
        final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs;
        RESOURCE_CLOSER.register(factoryAndCloser.rhs);
        for (QueryRunner<ResultRow> runner : QueryRunnerTestHelper.makeQueryRunners(factory)) {
            final String testName = StringUtils.format("config=%s, runner=%s", config.toString(), runner.toString());
            constructors.add(new Object[] { testName, factory, runner });
        }
    }
    return constructors;
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQueryRunnerFactory(org.apache.druid.query.groupby.GroupByQueryRunnerFactory) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) ArrayList(java.util.ArrayList)

Example 48 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class HashJoinSegmentStorageAdapter method makeCursors.

@Override
public Sequence<Cursor> makeCursors(@Nullable final Filter filter, @Nonnull final Interval interval, @Nonnull final VirtualColumns virtualColumns, @Nonnull final Granularity gran, final boolean descending, @Nullable final QueryMetrics<?> queryMetrics) {
    final Filter combinedFilter = baseFilterAnd(filter);
    if (clauses.isEmpty()) {
        return baseAdapter.makeCursors(combinedFilter, interval, virtualColumns, gran, descending, queryMetrics);
    }
    // Filter pre-analysis key implied by the call to "makeCursors". We need to sanity-check that it matches
    // the actual pre-analysis that was done. Note: we can't infer a rewrite config from the "makeCursors" call (it
    // requires access to the query context) so we'll need to skip sanity-checking it, by re-using the one present
    // in the cached key.)
    final JoinFilterPreAnalysisKey keyIn = new JoinFilterPreAnalysisKey(joinFilterPreAnalysis.getKey().getRewriteConfig(), clauses, virtualColumns, combinedFilter);
    final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
    if (!keyIn.equals(keyCached)) {
        // It is a bug if this happens. The implied key and the cached key should always match.
        throw new ISE("Pre-analysis mismatch, cannot execute query");
    }
    final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
    final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
    determineBaseColumnsWithPreAndPostJoinVirtualColumns(virtualColumns, preJoinVirtualColumns, postJoinVirtualColumns);
    // We merge the filter on base table specified by the user and filter on the base table that is pushed from
    // the join
    JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter);
    preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns());
    final Sequence<Cursor> baseCursorSequence = baseAdapter.makeCursors(joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, VirtualColumns.create(preJoinVirtualColumns), gran, descending, queryMetrics);
    Closer joinablesCloser = Closer.create();
    return Sequences.<Cursor, Cursor>map(baseCursorSequence, cursor -> {
        assert cursor != null;
        Cursor retVal = cursor;
        for (JoinableClause clause : clauses) {
            retVal = HashJoinEngine.makeJoinCursor(retVal, clause, descending, joinablesCloser);
        }
        return PostJoinCursor.wrap(retVal, VirtualColumns.create(postJoinVirtualColumns), joinFilterSplit.getJoinTableFilter().orElse(null));
    }).withBaggage(joinablesCloser);
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Indexed(org.apache.druid.segment.data.Indexed) Arrays(java.util.Arrays) Granularity(org.apache.druid.java.util.common.granularity.Granularity) QueryMetrics(org.apache.druid.query.QueryMetrics) Metadata(org.apache.druid.segment.Metadata) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) HashSet(java.util.HashSet) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ListIndexed(org.apache.druid.segment.data.ListIndexed) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) Nonnull(javax.annotation.Nonnull) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) LinkedHashSet(java.util.LinkedHashSet) Sequence(org.apache.druid.java.util.common.guava.Sequence) VirtualColumns(org.apache.druid.segment.VirtualColumns) Closer(org.apache.druid.java.util.common.io.Closer) VirtualColumn(org.apache.druid.segment.VirtualColumn) DateTime(org.joda.time.DateTime) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) List(java.util.List) Cursor(org.apache.druid.segment.Cursor) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Optional(java.util.Optional) JoinFilterAnalyzer(org.apache.druid.segment.join.filter.JoinFilterAnalyzer) Filters(org.apache.druid.segment.filter.Filters) Filter(org.apache.druid.query.filter.Filter) Filter(org.apache.druid.query.filter.Filter) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) ArrayList(java.util.ArrayList) ISE(org.apache.druid.java.util.common.ISE) VirtualColumn(org.apache.druid.segment.VirtualColumn) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Cursor(org.apache.druid.segment.Cursor)

Example 49 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class SegmentWriteOutMediumTest method testChildCloseFreesResourcesButNotParents.

@Test
public void testChildCloseFreesResourcesButNotParents() throws IOException {
    WriteOutBytes bytes1 = medium.makeWriteOutBytes();
    WriteOutBytes bytes2 = medium.makeWriteOutBytes();
    Assert.assertTrue(bytes1.isOpen());
    Assert.assertTrue(bytes2.isOpen());
    SegmentWriteOutMedium childMedium = medium.makeChildWriteOutMedium();
    Assert.assertTrue(childMedium.getClass().equals(medium.getClass()));
    WriteOutBytes bytes3 = childMedium.makeWriteOutBytes();
    WriteOutBytes bytes4 = childMedium.makeWriteOutBytes();
    Assert.assertTrue(bytes3.isOpen());
    Assert.assertTrue(bytes4.isOpen());
    Closer childCloser = childMedium.getCloser();
    childCloser.close();
    Assert.assertFalse(bytes3.isOpen());
    Assert.assertFalse(bytes4.isOpen());
    Assert.assertTrue(bytes1.isOpen());
    Assert.assertTrue(bytes2.isOpen());
    Closer closer = medium.getCloser();
    closer.close();
    Assert.assertFalse(bytes1.isOpen());
    Assert.assertFalse(bytes2.isOpen());
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Test(org.junit.Test)

Example 50 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class AppenderatorImpl method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
 * be run in the single-threaded pushExecutor.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @param useUniquePath true if the segment should be written to a path with a unique identifier
 *
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean useUniquePath) {
    // noinspection ObjectEquality
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            if (useUniquePath) {
                // Don't reuse the descriptor, because the caller asked for a unique path. Leave the old one as-is, since
                // it might serve some unknown purpose.
                log.debug("Segment[%s] already pushed, but we want a unique path, so will push again with a new path.", identifier);
            } else {
                log.info("Segment[%s] already pushed, skipping.", identifier);
                return objectMapper.readValue(descriptorFile, DataSegment.class);
            }
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                // if batch, swap/persist did not memory map the incremental index, we need it mapped now:
                if (!isOpenSegments()) {
                    // sanity
                    Pair<File, SegmentId> persistedMetadata = persistedHydrantMetadata.get(fireHydrant);
                    if (persistedMetadata == null) {
                        throw new ISE("Persisted metadata for batch hydrant [%s] is null!", fireHydrant);
                    }
                    File persistedFile = persistedMetadata.lhs;
                    SegmentId persistedSegmentId = persistedMetadata.rhs;
                    // sanity:
                    if (persistedFile == null) {
                        throw new ISE("Persisted file for batch hydrant [%s] is null!", fireHydrant);
                    } else if (persistedSegmentId == null) {
                        throw new ISE("Persisted segmentId for batch hydrant in file [%s] is null!", persistedFile.getPath());
                    }
                    fireHydrant.swapSegment(new QueryableIndexSegment(indexIO.loadIndex(persistedFile), persistedSegmentId));
                }
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        final DataSegment segmentToPush = sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec()));
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// semantics.
        () -> dataSegmentPusher.push(mergedFile, segmentToPush, useUniquePath), exception -> exception instanceof Exception, 5);
        if (!isOpenSegments()) {
            // can generate OOMs during merge if enough of them are held back...
            for (FireHydrant fireHydrant : sink) {
                fireHydrant.swapSegment(null);
            }
        }
        final long pushFinishTime = System.nanoTime();
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) SegmentId(org.apache.druid.timeline.SegmentId) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Aggregations

Closer (org.apache.druid.java.util.common.io.Closer)58 IOException (java.io.IOException)21 ArrayList (java.util.ArrayList)17 File (java.io.File)12 ISE (org.apache.druid.java.util.common.ISE)10 List (java.util.List)9 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)8 ByteBuffer (java.nio.ByteBuffer)7 Nullable (javax.annotation.Nullable)7 QueryableIndex (org.apache.druid.segment.QueryableIndex)7 Test (org.junit.Test)7 GroupByQueryRunnerFactory (org.apache.druid.query.groupby.GroupByQueryRunnerFactory)6 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)6 ImmutableList (com.google.common.collect.ImmutableList)5 Map (java.util.Map)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 Closeable (java.io.Closeable)4 ExecutionException (java.util.concurrent.ExecutionException)4 CloseableIterator (org.apache.druid.java.util.common.parsers.CloseableIterator)4 BaseProgressIndicator (org.apache.druid.segment.BaseProgressIndicator)4