use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class OrcReader method intermediateRowIterator.
@Override
protected CloseableIterator<OrcStruct> intermediateRowIterator() throws IOException {
final Closer closer = Closer.create();
// We fetch here to cache a copy locally. However, this might need to be changed if we want to split an orc file
// into several InputSplits in the future.
final byte[] buffer = new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE];
final CleanableFile file = closer.register(source.fetch(temporaryDirectory, buffer));
final Path path = new Path(file.file().toURI());
final ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
final Reader reader;
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
reader = closer.register(OrcFile.createReader(path, OrcFile.readerOptions(conf)));
} finally {
Thread.currentThread().setContextClassLoader(currentClassLoader);
}
// The below line will get the schmea to read the whole columns.
// This can be improved by projecting some columns only what users want in the future.
final TypeDescription schema = reader.getSchema();
final RecordReader batchReader = reader.rows(reader.options());
final OrcMapredRecordReader<OrcStruct> recordReader = new OrcMapredRecordReader<>(batchReader, schema);
closer.register(recordReader::close);
return new CloseableIterator<OrcStruct>() {
final NullWritable key = recordReader.createKey();
OrcStruct value = null;
@Override
public boolean hasNext() {
if (value == null) {
try {
// The returned OrcStruct in next() can be kept in memory for a while.
// Here, we create a new instance of OrcStruct before calling RecordReader.next(),
// so that we can avoid to share the same reference to the "value" across rows.
value = recordReader.createValue();
if (!recordReader.next(key, value)) {
value = null;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return value != null;
}
@Override
public OrcStruct next() {
if (value == null) {
throw new NoSuchElementException();
}
final OrcStruct currentValue = value;
value = null;
return currentValue;
}
@Override
public void close() throws IOException {
closer.close();
}
};
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class PartialSegmentMergeTask method mergeSegmentsInSamePartition.
private static Pair<File, List<String>> mergeSegmentsInSamePartition(DataSchema dataSchema, ParallelIndexTuningConfig tuningConfig, IndexIO indexIO, IndexMergerV9 merger, List<File> indexes, int maxNumSegmentsToMerge, File baseOutDir, int outDirSuffix) throws IOException {
int suffix = outDirSuffix;
final List<File> mergedFiles = new ArrayList<>();
List<String> dimensionNames = null;
for (int i = 0; i < indexes.size(); i += maxNumSegmentsToMerge) {
final List<File> filesToMerge = indexes.subList(i, Math.min(i + maxNumSegmentsToMerge, indexes.size()));
final List<QueryableIndex> indexesToMerge = new ArrayList<>(filesToMerge.size());
final Closer indexCleaner = Closer.create();
for (File file : filesToMerge) {
final QueryableIndex queryableIndex = indexIO.loadIndex(file);
indexesToMerge.add(queryableIndex);
indexCleaner.register(() -> {
queryableIndex.close();
file.delete();
});
}
if (maxNumSegmentsToMerge >= indexes.size()) {
dimensionNames = IndexMerger.getMergedDimensionsFromQueryableIndexes(indexesToMerge, dataSchema.getDimensionsSpec());
}
final File outDir = new File(baseOutDir, StringUtils.format("merged_%d", suffix++));
mergedFiles.add(merger.mergeQueryableIndex(indexesToMerge, dataSchema.getGranularitySpec().isRollup(), dataSchema.getAggregators(), null, outDir, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge()));
indexCleaner.close();
}
if (mergedFiles.size() == 1) {
return Pair.of(mergedFiles.get(0), Preconditions.checkNotNull(dimensionNames, "dimensionNames"));
} else {
return mergeSegmentsInSamePartition(dataSchema, tuningConfig, indexIO, merger, mergedFiles, maxNumSegmentsToMerge, baseOutDir, suffix);
}
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class RemoteTaskRunner method stop.
@Override
@LifecycleStop
public void stop() {
if (!lifecycleLock.canStop()) {
return;
}
try {
log.info("Stopping RemoteTaskRunner...");
provisioningService.close();
Closer closer = Closer.create();
for (ZkWorker zkWorker : zkWorkers.values()) {
closer.register(zkWorker);
}
closer.register(workerPathCache);
try {
closer.close();
} finally {
workerStatusPathChildrenCacheExecutor.shutdown();
}
if (runPendingTasksExec != null) {
runPendingTasksExec.shutdown();
}
if (cleanupExec != null) {
cleanupExec.shutdown();
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
lifecycleLock.exitStop();
}
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class ParallelCombiner method combine.
/**
* Build a combining tree for the input iterators which combine input entries asynchronously. Each node in the tree
* is a combining task which iterates through child iterators, aggregates the inputs from those iterators, and returns
* an iterator for the result of aggregation.
* <p>
* This method is called when data is spilled and thus streaming combine is preferred to avoid too many disk accesses.
*
* @return an iterator of the root grouper of the combining tree
*/
public CloseableIterator<Entry<KeyType>> combine(List<? extends CloseableIterator<Entry<KeyType>>> sortedIterators, List<String> mergedDictionary) {
// CombineBuffer is initialized when this method is called and closed after the result iterator is done
final Closer closer = Closer.create();
try {
final ByteBuffer combineBuffer = combineBufferHolder.get();
final int minimumRequiredBufferCapacity = StreamingMergeSortedGrouper.requiredBufferCapacity(combineKeySerdeFactory.factorizeWithDictionary(mergedDictionary), combiningFactories);
// We want to maximize the parallelism while the size of buffer slice is greater than the minimum buffer size
// required by StreamingMergeSortedGrouper. Here, we find the leafCombineDegree of the cominbing tree and the
// required number of buffers maximizing the parallelism.
final Pair<Integer, Integer> degreeAndNumBuffers = findLeafCombineDegreeAndNumBuffers(combineBuffer, minimumRequiredBufferCapacity, concurrencyHint, sortedIterators.size());
final int leafCombineDegree = degreeAndNumBuffers.lhs;
final int numBuffers = degreeAndNumBuffers.rhs;
final int sliceSize = combineBuffer.capacity() / numBuffers;
final Supplier<ByteBuffer> bufferSupplier = createCombineBufferSupplier(combineBuffer, numBuffers, sliceSize);
final Pair<List<CloseableIterator<Entry<KeyType>>>, List<Future>> combineIteratorAndFutures = buildCombineTree(sortedIterators, bufferSupplier, combiningFactories, leafCombineDegree, mergedDictionary);
final CloseableIterator<Entry<KeyType>> combineIterator = Iterables.getOnlyElement(combineIteratorAndFutures.lhs);
final List<Future> combineFutures = combineIteratorAndFutures.rhs;
closer.register(() -> checkCombineFutures(combineFutures));
return CloseableIterators.wrap(combineIterator, closer);
} catch (Throwable t) {
try {
closer.close();
} catch (Throwable t2) {
t.addSuppressed(t2);
}
throw t;
}
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class SegmentManager method dropSegment.
public void dropSegment(final DataSegment segment) {
final String dataSource = segment.getDataSource();
// compute() is used to ensure that the operation for a data source is executed atomically
dataSources.compute(dataSource, (dataSourceName, dataSourceState) -> {
if (dataSourceState == null) {
log.info("Told to delete a queryable for a dataSource[%s] that doesn't exist.", dataSourceName);
return null;
} else {
final VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSourceState.getTimeline();
final ShardSpec shardSpec = segment.getShardSpec();
final PartitionChunk<ReferenceCountingSegment> removed = loadedIntervals.remove(segment.getInterval(), segment.getVersion(), // partitionChunk. Note that partitionChunk.equals() checks only the partitionNum, but not the object.
segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(null, shardSpec)));
final ReferenceCountingSegment oldQueryable = (removed == null) ? null : removed.getObject();
if (oldQueryable != null) {
try (final Closer closer = Closer.create()) {
dataSourceState.removeSegment(segment);
closer.register(oldQueryable);
log.info("Attempting to close segment %s", segment.getId());
final ReferenceCountingIndexedTable oldTable = dataSourceState.tablesLookup.remove(segment.getId());
if (oldTable != null) {
closer.register(oldTable);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
log.info("Told to delete a queryable on dataSource[%s] for interval[%s] and version[%s] that I don't have.", dataSourceName, segment.getInterval(), segment.getVersion());
}
// Returning null removes the entry of dataSource from the map
return dataSourceState.isEmpty() ? null : dataSourceState;
}
});
segmentLoader.cleanup(segment);
}
Aggregations