Search in sources :

Example 51 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class BatchAppenderator method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink) {
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    if (sink.isWritable()) {
        throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
    }
    int numHydrants = 0;
    for (FireHydrant hydrant : sink) {
        if (!hydrant.hasSwapped()) {
            throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
        }
        numHydrants++;
    }
    SinkMetadata sm = sinksMetadata.get(identifier);
    if (sm == null) {
        log.warn("Sink metadata not found just before merge for identifier [%s]", identifier);
    } else if (numHydrants != sm.getNumHydrants()) {
        throw new ISE("Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]", numHydrants, identifier, sm.getNumHydrants());
    }
    try {
        if (descriptorFile.exists()) {
            // Already pushed.
            log.info("Segment[%s] already pushed, skipping.", identifier);
            return objectMapper.readValue(descriptorFile, DataSegment.class);
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// This appenderator is used only for the local indexing task so unique paths are not required
        () -> dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false), exception -> exception instanceof Exception, 5);
        // can generate OOMs during merge if enough of them are held back...
        for (FireHydrant fireHydrant : sink) {
            fireHydrant.swapSegment(null);
        }
        // cleanup, sink no longer needed
        removeDirectory(computePersistDir(identifier));
        final long pushFinishTime = System.nanoTime();
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Example 52 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class InputSourceSampler method sample.

public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
    Preconditions.checkNotNull(inputSource, "inputSource required");
    if (inputSource.needsFormat()) {
        Preconditions.checkNotNull(inputFormat, "inputFormat required");
    }
    final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
    final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
    final Closer closer = Closer.create();
    final File tempDir = FileUtils.createTempDir();
    closer.register(() -> FileUtils.deleteDirectory(tempDir));
    try {
        final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
        try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
            final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
            final Closer closer1 = closer) {
            List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
            int numRowsIndexed = 0;
            while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
                final InputRowListPlusRawValues inputRowListPlusRawValues = iterator.next();
                final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
                final ParseException parseException = inputRowListPlusRawValues.getParseException();
                if (parseException != null) {
                    if (rawColumnsList != null) {
                        // add all rows to response
                        responseRows.addAll(rawColumnsList.stream().map(rawColumns -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
                    } else {
                        // no data parsed, add one response row
                        responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
                    }
                    continue;
                }
                List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
                if (inputRows == null) {
                    continue;
                }
                for (int i = 0; i < inputRows.size(); i++) {
                    // InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
                    Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
                    InputRow row = inputRows.get(i);
                    // keep the index of the row to be added to responseRows for further use
                    final int rowIndex = responseRows.size();
                    IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
                    if (addResult.hasParseException()) {
                        responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
                    } else {
                        // store the raw value; will be merged with the data from the IncrementalIndex later
                        responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
                        numRowsIndexed++;
                    }
                }
            }
            final List<String> columnNames = index.getColumnNames();
            columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
            for (Row row : index) {
                Map<String, Object> parsed = new LinkedHashMap<>();
                parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
                columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
                Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
                if (sortKey != null) {
                    responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
                }
            }
            // make sure size of responseRows meets the input
            if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
                responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
            }
            int numRowsRead = responseRows.size();
            return new SamplerResponse(numRowsRead, numRowsIndexed, responseRows.stream().filter(Objects::nonNull).filter(x -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
        }
    } catch (Exception e) {
        throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Closer(org.apache.druid.java.util.common.io.Closer) InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) ParseException(org.apache.druid.java.util.common.parsers.ParseException) DataSchema(org.apache.druid.segment.indexing.DataSchema) InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimedShutoffInputSourceReader(org.apache.druid.data.input.impl.TimedShutoffInputSourceReader) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) InputRow(org.apache.druid.data.input.InputRow) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Row(org.apache.druid.data.input.Row) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 53 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class QueryableIndexCursorSequenceBuilder method build.

public Sequence<Cursor> build(final Granularity gran) {
    final Offset baseOffset;
    if (filterBitmap == null) {
        baseOffset = descending ? new SimpleDescendingOffset(index.getNumRows()) : new SimpleAscendingOffset(index.getNumRows());
    } else {
        baseOffset = BitmapOffset.of(filterBitmap, descending, index.getNumRows());
    }
    // Column caches shared amongst all cursors in this sequence.
    final Map<String, BaseColumn> columnCache = new HashMap<>();
    final NumericColumn timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn();
    final Closer closer = Closer.create();
    closer.register(timestamps);
    Iterable<Interval> iterable = gran.getIterable(interval);
    if (descending) {
        iterable = Lists.reverse(ImmutableList.copyOf(iterable));
    }
    return Sequences.withBaggage(Sequences.map(Sequences.simple(iterable), new Function<Interval, Cursor>() {

        @Override
        public Cursor apply(final Interval inputInterval) {
            final long timeStart = Math.max(interval.getStartMillis(), inputInterval.getStartMillis());
            final long timeEnd = Math.min(interval.getEndMillis(), gran.increment(inputInterval.getStartMillis()));
            if (descending) {
                for (; baseOffset.withinBounds(); baseOffset.increment()) {
                    if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) < timeEnd) {
                        break;
                    }
                }
            } else {
                for (; baseOffset.withinBounds(); baseOffset.increment()) {
                    if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) >= timeStart) {
                        break;
                    }
                }
            }
            final Offset offset = descending ? new DescendingTimestampCheckingOffset(baseOffset, timestamps, timeStart, minDataTimestamp >= timeStart) : new AscendingTimestampCheckingOffset(baseOffset, timestamps, timeEnd, maxDataTimestamp < timeEnd);
            final Offset baseCursorOffset = offset.clone();
            final ColumnSelectorFactory columnSelectorFactory = new QueryableIndexColumnSelectorFactory(index, virtualColumns, descending, closer, baseCursorOffset.getBaseReadableOffset(), columnCache);
            final DateTime myBucket = gran.toDateTime(inputInterval.getStartMillis());
            if (postFilter == null) {
                return new QueryableIndexCursor(baseCursorOffset, columnSelectorFactory, myBucket);
            } else {
                FilteredOffset filteredOffset = new FilteredOffset(baseCursorOffset, columnSelectorFactory, descending, postFilter, bitmapIndexSelector);
                return new QueryableIndexCursor(filteredOffset, columnSelectorFactory, myBucket);
            }
        }
    }), closer);
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) QueryableIndexVectorColumnSelectorFactory(org.apache.druid.segment.vector.QueryableIndexVectorColumnSelectorFactory) VectorColumnSelectorFactory(org.apache.druid.segment.vector.VectorColumnSelectorFactory) HashMap(java.util.HashMap) DateTime(org.joda.time.DateTime) Offset(org.apache.druid.segment.data.Offset) ReadableOffset(org.apache.druid.segment.data.ReadableOffset) FilteredVectorOffset(org.apache.druid.segment.vector.FilteredVectorOffset) BitmapVectorOffset(org.apache.druid.segment.vector.BitmapVectorOffset) NoFilterVectorOffset(org.apache.druid.segment.vector.NoFilterVectorOffset) VectorOffset(org.apache.druid.segment.vector.VectorOffset) Function(com.google.common.base.Function) NumericColumn(org.apache.druid.segment.column.NumericColumn) BaseColumn(org.apache.druid.segment.column.BaseColumn) Interval(org.joda.time.Interval)

Example 54 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class ReferenceCountingCloseableObject method incrementReferenceAndDecrementOnceCloseable.

/**
 * Returns an {@link Optional} of a {@link Closeable} from {@link #decrementOnceCloseable}, if it is able to
 * successfully {@link #increment}, else nothing indicating that the reference could not be acquired.
 */
public Optional<Closeable> incrementReferenceAndDecrementOnceCloseable() {
    final Closer closer;
    if (increment()) {
        closer = Closer.create();
        closer.register(decrementOnceCloseable());
    } else {
        closer = null;
    }
    return Optional.ofNullable(closer);
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer)

Example 55 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class AggregationTestHelper method createTopNQueryAggregationTestHelper.

public static AggregationTestHelper createTopNQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
    ObjectMapper mapper = TestHelper.makeJsonMapper();
    TopNQueryQueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig());
    final CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("TopNQueryRunnerFactory-bufferPool", new Supplier<ByteBuffer>() {

        @Override
        public ByteBuffer get() {
            return ByteBuffer.allocate(10 * 1024 * 1024);
        }
    });
    final Closer resourceCloser = Closer.create();
    TopNQueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, toolchest, QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {

        @Override
        public int columnCacheSizeBytes() {
            return 0;
        }
    });
    return new AggregationTestHelper(mapper, new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister, resourceCloser, Collections.emptyMap());
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ColumnConfig(org.apache.druid.segment.column.ColumnConfig) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) CloseableStupidPool(org.apache.druid.collections.CloseableStupidPool) ByteBuffer(java.nio.ByteBuffer) TopNQueryConfig(org.apache.druid.query.topn.TopNQueryConfig) IndexIO(org.apache.druid.segment.IndexIO) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Aggregations

Closer (org.apache.druid.java.util.common.io.Closer)58 IOException (java.io.IOException)21 ArrayList (java.util.ArrayList)17 File (java.io.File)12 ISE (org.apache.druid.java.util.common.ISE)10 List (java.util.List)9 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)8 ByteBuffer (java.nio.ByteBuffer)7 Nullable (javax.annotation.Nullable)7 QueryableIndex (org.apache.druid.segment.QueryableIndex)7 Test (org.junit.Test)7 GroupByQueryRunnerFactory (org.apache.druid.query.groupby.GroupByQueryRunnerFactory)6 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)6 ImmutableList (com.google.common.collect.ImmutableList)5 Map (java.util.Map)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 Closeable (java.io.Closeable)4 ExecutionException (java.util.concurrent.ExecutionException)4 CloseableIterator (org.apache.druid.java.util.common.parsers.CloseableIterator)4 BaseProgressIndicator (org.apache.druid.segment.BaseProgressIndicator)4