use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.
the class ParquetReader method intermediateRowIterator.
@Override
protected CloseableIterator<Group> intermediateRowIterator() throws IOException {
final Closer closer = Closer.create();
byte[] buffer = new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE];
final ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
final org.apache.parquet.hadoop.ParquetReader<Group> reader;
try {
final CleanableFile file = closer.register(source.fetch(temporaryDirectory, buffer));
final Path path = new Path(file.file().toURI());
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
reader = closer.register(org.apache.parquet.hadoop.ParquetReader.builder(new GroupReadSupport(), path).withConf(conf).build());
} catch (Exception e) {
// We don't expect to see any exceptions thrown in the above try clause,
// but we catch it just in case to avoid any potential resource leak.
closer.close();
throw new RuntimeException(e);
} finally {
Thread.currentThread().setContextClassLoader(currentClassLoader);
}
return new CloseableIterator<Group>() {
Group value = null;
@Override
public boolean hasNext() {
if (value == null) {
try {
value = reader.read();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return value != null;
}
@Override
public Group next() {
if (value == null) {
throw new NoSuchElementException();
}
Group currentValue = value;
value = null;
return currentValue;
}
@Override
public void close() throws IOException {
closer.close();
}
};
}
use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.
the class DruidSegmentReaderTest method testMakeCloseableIteratorFromSequenceAndSegmentFileCloseYielderOnClose.
@Test
public void testMakeCloseableIteratorFromSequenceAndSegmentFileCloseYielderOnClose() throws IOException {
MutableBoolean isSequenceClosed = new MutableBoolean(false);
MutableBoolean isFileClosed = new MutableBoolean(false);
Sequence<Map<String, Object>> sequence = new BaseSequence<>(new IteratorMaker<Map<String, Object>, Iterator<Map<String, Object>>>() {
@Override
public Iterator<Map<String, Object>> make() {
return Collections.emptyIterator();
}
@Override
public void cleanup(Iterator<Map<String, Object>> iterFromMake) {
isSequenceClosed.setValue(true);
}
});
CleanableFile cleanableFile = new CleanableFile() {
@Override
public File file() {
return null;
}
@Override
public void close() {
isFileClosed.setValue(true);
}
};
try (CloseableIterator<Map<String, Object>> iterator = DruidSegmentReader.makeCloseableIteratorFromSequenceAndSegmentFile(sequence, cleanableFile)) {
while (iterator.hasNext()) {
iterator.next();
}
}
Assert.assertTrue("File is not closed", isFileClosed.booleanValue());
Assert.assertTrue("Sequence is not closed", isSequenceClosed.booleanValue());
}
use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.
the class OrcReader method intermediateRowIterator.
@Override
protected CloseableIterator<OrcStruct> intermediateRowIterator() throws IOException {
final Closer closer = Closer.create();
// We fetch here to cache a copy locally. However, this might need to be changed if we want to split an orc file
// into several InputSplits in the future.
final byte[] buffer = new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE];
final CleanableFile file = closer.register(source.fetch(temporaryDirectory, buffer));
final Path path = new Path(file.file().toURI());
final ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
final Reader reader;
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
reader = closer.register(OrcFile.createReader(path, OrcFile.readerOptions(conf)));
} finally {
Thread.currentThread().setContextClassLoader(currentClassLoader);
}
// The below line will get the schmea to read the whole columns.
// This can be improved by projecting some columns only what users want in the future.
final TypeDescription schema = reader.getSchema();
final RecordReader batchReader = reader.rows(reader.options());
final OrcMapredRecordReader<OrcStruct> recordReader = new OrcMapredRecordReader<>(batchReader, schema);
closer.register(recordReader::close);
return new CloseableIterator<OrcStruct>() {
final NullWritable key = recordReader.createKey();
OrcStruct value = null;
@Override
public boolean hasNext() {
if (value == null) {
try {
// The returned OrcStruct in next() can be kept in memory for a while.
// Here, we create a new instance of OrcStruct before calling RecordReader.next(),
// so that we can avoid to share the same reference to the "value" across rows.
value = recordReader.createValue();
if (!recordReader.next(key, value)) {
value = null;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return value != null;
}
@Override
public OrcStruct next() {
if (value == null) {
throw new NoSuchElementException();
}
final OrcStruct currentValue = value;
value = null;
return currentValue;
}
@Override
public void close() throws IOException {
closer.close();
}
};
}
use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.
the class DruidSegmentReader method intermediateRowIterator.
@Override
protected CloseableIterator<Map<String, Object>> intermediateRowIterator() throws IOException {
final CleanableFile segmentFile = source.fetch(temporaryDirectory, null);
final WindowedStorageAdapter storageAdapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile.file())), source.getIntervalFilter());
final Sequence<Cursor> cursors = storageAdapter.getAdapter().makeCursors(Filters.toFilter(dimFilter), storageAdapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
// Retain order of columns from the original segments. Useful for preserving dimension order if we're in
// schemaless mode.
final Set<String> columnsToRead = Sets.newLinkedHashSet(Iterables.filter(storageAdapter.getAdapter().getRowSignature().getColumnNames(), columnsFilter::apply));
final Sequence<Map<String, Object>> sequence = Sequences.concat(Sequences.map(cursors, cursor -> cursorToSequence(cursor, columnsToRead)));
return makeCloseableIteratorFromSequenceAndSegmentFile(sequence, segmentFile);
}
Aggregations