Search in sources :

Example 41 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class OrcReader method intermediateRowIterator.

@Override
protected CloseableIterator<OrcStruct> intermediateRowIterator() throws IOException {
    final Closer closer = Closer.create();
    // We fetch here to cache a copy locally. However, this might need to be changed if we want to split an orc file
    // into several InputSplits in the future.
    final byte[] buffer = new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE];
    final CleanableFile file = closer.register(source.fetch(temporaryDirectory, buffer));
    final Path path = new Path(file.file().toURI());
    final ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
    final Reader reader;
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        reader = closer.register(OrcFile.createReader(path, OrcFile.readerOptions(conf)));
    } finally {
        Thread.currentThread().setContextClassLoader(currentClassLoader);
    }
    // The below line will get the schmea to read the whole columns.
    // This can be improved by projecting some columns only what users want in the future.
    final TypeDescription schema = reader.getSchema();
    final RecordReader batchReader = reader.rows(reader.options());
    final OrcMapredRecordReader<OrcStruct> recordReader = new OrcMapredRecordReader<>(batchReader, schema);
    closer.register(recordReader::close);
    return new CloseableIterator<OrcStruct>() {

        final NullWritable key = recordReader.createKey();

        OrcStruct value = null;

        @Override
        public boolean hasNext() {
            if (value == null) {
                try {
                    // The returned OrcStruct in next() can be kept in memory for a while.
                    // Here, we create a new instance of OrcStruct before calling RecordReader.next(),
                    // so that we can avoid to share the same reference to the "value" across rows.
                    value = recordReader.createValue();
                    if (!recordReader.next(key, value)) {
                        value = null;
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return value != null;
        }

        @Override
        public OrcStruct next() {
            if (value == null) {
                throw new NoSuchElementException();
            }
            final OrcStruct currentValue = value;
            value = null;
            return currentValue;
        }

        @Override
        public void close() throws IOException {
            closer.close();
        }
    };
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Path(org.apache.hadoop.fs.Path) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) RecordReader(org.apache.orc.RecordReader) OrcMapredRecordReader(org.apache.orc.mapred.OrcMapredRecordReader) Reader(org.apache.orc.Reader) RecordReader(org.apache.orc.RecordReader) IntermediateRowParsingReader(org.apache.druid.data.input.IntermediateRowParsingReader) OrcMapredRecordReader(org.apache.orc.mapred.OrcMapredRecordReader) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) OrcStruct(org.apache.orc.mapred.OrcStruct) TypeDescription(org.apache.orc.TypeDescription) OrcMapredRecordReader(org.apache.orc.mapred.OrcMapredRecordReader) CleanableFile(org.apache.druid.data.input.InputEntity.CleanableFile) NoSuchElementException(java.util.NoSuchElementException)

Example 42 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class PartialSegmentMergeTask method mergeSegmentsInSamePartition.

private static Pair<File, List<String>> mergeSegmentsInSamePartition(DataSchema dataSchema, ParallelIndexTuningConfig tuningConfig, IndexIO indexIO, IndexMergerV9 merger, List<File> indexes, int maxNumSegmentsToMerge, File baseOutDir, int outDirSuffix) throws IOException {
    int suffix = outDirSuffix;
    final List<File> mergedFiles = new ArrayList<>();
    List<String> dimensionNames = null;
    for (int i = 0; i < indexes.size(); i += maxNumSegmentsToMerge) {
        final List<File> filesToMerge = indexes.subList(i, Math.min(i + maxNumSegmentsToMerge, indexes.size()));
        final List<QueryableIndex> indexesToMerge = new ArrayList<>(filesToMerge.size());
        final Closer indexCleaner = Closer.create();
        for (File file : filesToMerge) {
            final QueryableIndex queryableIndex = indexIO.loadIndex(file);
            indexesToMerge.add(queryableIndex);
            indexCleaner.register(() -> {
                queryableIndex.close();
                file.delete();
            });
        }
        if (maxNumSegmentsToMerge >= indexes.size()) {
            dimensionNames = IndexMerger.getMergedDimensionsFromQueryableIndexes(indexesToMerge, dataSchema.getDimensionsSpec());
        }
        final File outDir = new File(baseOutDir, StringUtils.format("merged_%d", suffix++));
        mergedFiles.add(merger.mergeQueryableIndex(indexesToMerge, dataSchema.getGranularitySpec().isRollup(), dataSchema.getAggregators(), null, outDir, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge()));
        indexCleaner.close();
    }
    if (mergedFiles.size() == 1) {
        return Pair.of(mergedFiles.get(0), Preconditions.checkNotNull(dimensionNames, "dimensionNames"));
    } else {
        return mergeSegmentsInSamePartition(dataSchema, tuningConfig, indexIO, merger, mergedFiles, maxNumSegmentsToMerge, baseOutDir, suffix);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) QueryableIndex(org.apache.druid.segment.QueryableIndex) ArrayList(java.util.ArrayList) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator)

Example 43 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class RemoteTaskRunner method stop.

@Override
@LifecycleStop
public void stop() {
    if (!lifecycleLock.canStop()) {
        return;
    }
    try {
        log.info("Stopping RemoteTaskRunner...");
        provisioningService.close();
        Closer closer = Closer.create();
        for (ZkWorker zkWorker : zkWorkers.values()) {
            closer.register(zkWorker);
        }
        closer.register(workerPathCache);
        try {
            closer.close();
        } finally {
            workerStatusPathChildrenCacheExecutor.shutdown();
        }
        if (runPendingTasksExec != null) {
            runPendingTasksExec.shutdown();
        }
        if (cleanupExec != null) {
            cleanupExec.shutdown();
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        lifecycleLock.exitStop();
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) TimeoutException(java.util.concurrent.TimeoutException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) LifecycleStop(org.apache.druid.java.util.common.lifecycle.LifecycleStop)

Example 44 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class ParallelCombiner method combine.

/**
 * Build a combining tree for the input iterators which combine input entries asynchronously.  Each node in the tree
 * is a combining task which iterates through child iterators, aggregates the inputs from those iterators, and returns
 * an iterator for the result of aggregation.
 * <p>
 * This method is called when data is spilled and thus streaming combine is preferred to avoid too many disk accesses.
 *
 * @return an iterator of the root grouper of the combining tree
 */
public CloseableIterator<Entry<KeyType>> combine(List<? extends CloseableIterator<Entry<KeyType>>> sortedIterators, List<String> mergedDictionary) {
    // CombineBuffer is initialized when this method is called and closed after the result iterator is done
    final Closer closer = Closer.create();
    try {
        final ByteBuffer combineBuffer = combineBufferHolder.get();
        final int minimumRequiredBufferCapacity = StreamingMergeSortedGrouper.requiredBufferCapacity(combineKeySerdeFactory.factorizeWithDictionary(mergedDictionary), combiningFactories);
        // We want to maximize the parallelism while the size of buffer slice is greater than the minimum buffer size
        // required by StreamingMergeSortedGrouper. Here, we find the leafCombineDegree of the cominbing tree and the
        // required number of buffers maximizing the parallelism.
        final Pair<Integer, Integer> degreeAndNumBuffers = findLeafCombineDegreeAndNumBuffers(combineBuffer, minimumRequiredBufferCapacity, concurrencyHint, sortedIterators.size());
        final int leafCombineDegree = degreeAndNumBuffers.lhs;
        final int numBuffers = degreeAndNumBuffers.rhs;
        final int sliceSize = combineBuffer.capacity() / numBuffers;
        final Supplier<ByteBuffer> bufferSupplier = createCombineBufferSupplier(combineBuffer, numBuffers, sliceSize);
        final Pair<List<CloseableIterator<Entry<KeyType>>>, List<Future>> combineIteratorAndFutures = buildCombineTree(sortedIterators, bufferSupplier, combiningFactories, leafCombineDegree, mergedDictionary);
        final CloseableIterator<Entry<KeyType>> combineIterator = Iterables.getOnlyElement(combineIteratorAndFutures.lhs);
        final List<Future> combineFutures = combineIteratorAndFutures.rhs;
        closer.register(() -> checkCombineFutures(combineFutures));
        return CloseableIterators.wrap(combineIterator, closer);
    } catch (Throwable t) {
        try {
            closer.close();
        } catch (Throwable t2) {
            t.addSuppressed(t2);
        }
        throw t;
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ByteBuffer(java.nio.ByteBuffer) Entry(org.apache.druid.query.groupby.epinephelinae.Grouper.Entry) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Future(java.util.concurrent.Future) ArrayList(java.util.ArrayList) List(java.util.List)

Example 45 with Closer

use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.

the class SegmentManager method dropSegment.

public void dropSegment(final DataSegment segment) {
    final String dataSource = segment.getDataSource();
    // compute() is used to ensure that the operation for a data source is executed atomically
    dataSources.compute(dataSource, (dataSourceName, dataSourceState) -> {
        if (dataSourceState == null) {
            log.info("Told to delete a queryable for a dataSource[%s] that doesn't exist.", dataSourceName);
            return null;
        } else {
            final VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSourceState.getTimeline();
            final ShardSpec shardSpec = segment.getShardSpec();
            final PartitionChunk<ReferenceCountingSegment> removed = loadedIntervals.remove(segment.getInterval(), segment.getVersion(), // partitionChunk. Note that partitionChunk.equals() checks only the partitionNum, but not the object.
            segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(null, shardSpec)));
            final ReferenceCountingSegment oldQueryable = (removed == null) ? null : removed.getObject();
            if (oldQueryable != null) {
                try (final Closer closer = Closer.create()) {
                    dataSourceState.removeSegment(segment);
                    closer.register(oldQueryable);
                    log.info("Attempting to close segment %s", segment.getId());
                    final ReferenceCountingIndexedTable oldTable = dataSourceState.tablesLookup.remove(segment.getId());
                    if (oldTable != null) {
                        closer.register(oldTable);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            } else {
                log.info("Told to delete a queryable on dataSource[%s] for interval[%s] and version[%s] that I don't have.", dataSourceName, segment.getInterval(), segment.getVersion());
            }
            // Returning null removes the entry of dataSource from the map
            return dataSourceState.isEmpty() ? null : dataSourceState;
        }
    });
    segmentLoader.cleanup(segment);
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) IOException(java.io.IOException) ShardSpec(org.apache.druid.timeline.partition.ShardSpec)

Aggregations

Closer (org.apache.druid.java.util.common.io.Closer)58 IOException (java.io.IOException)21 ArrayList (java.util.ArrayList)17 File (java.io.File)12 ISE (org.apache.druid.java.util.common.ISE)10 List (java.util.List)9 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)8 ByteBuffer (java.nio.ByteBuffer)7 Nullable (javax.annotation.Nullable)7 QueryableIndex (org.apache.druid.segment.QueryableIndex)7 Test (org.junit.Test)7 GroupByQueryRunnerFactory (org.apache.druid.query.groupby.GroupByQueryRunnerFactory)6 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)6 ImmutableList (com.google.common.collect.ImmutableList)5 Map (java.util.Map)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 Closeable (java.io.Closeable)4 ExecutionException (java.util.concurrent.ExecutionException)4 CloseableIterator (org.apache.druid.java.util.common.parsers.CloseableIterator)4 BaseProgressIndicator (org.apache.druid.segment.BaseProgressIndicator)4