Search in sources :

Example 1 with CleanableFile

use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.

the class ParquetReader method intermediateRowIterator.

@Override
protected CloseableIterator<Group> intermediateRowIterator() throws IOException {
    final Closer closer = Closer.create();
    byte[] buffer = new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE];
    final ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
    final org.apache.parquet.hadoop.ParquetReader<Group> reader;
    try {
        final CleanableFile file = closer.register(source.fetch(temporaryDirectory, buffer));
        final Path path = new Path(file.file().toURI());
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        reader = closer.register(org.apache.parquet.hadoop.ParquetReader.builder(new GroupReadSupport(), path).withConf(conf).build());
    } catch (Exception e) {
        // We don't expect to see any exceptions thrown in the above try clause,
        // but we catch it just in case to avoid any potential resource leak.
        closer.close();
        throw new RuntimeException(e);
    } finally {
        Thread.currentThread().setContextClassLoader(currentClassLoader);
    }
    return new CloseableIterator<Group>() {

        Group value = null;

        @Override
        public boolean hasNext() {
            if (value == null) {
                try {
                    value = reader.read();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return value != null;
        }

        @Override
        public Group next() {
            if (value == null) {
                throw new NoSuchElementException();
            }
            Group currentValue = value;
            value = null;
            return currentValue;
        }

        @Override
        public void close() throws IOException {
            closer.close();
        }
    };
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) GroupReadSupport(org.apache.parquet.hadoop.example.GroupReadSupport) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) IOException(java.io.IOException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) IOException(java.io.IOException) NoSuchElementException(java.util.NoSuchElementException) CleanableFile(org.apache.druid.data.input.InputEntity.CleanableFile) NoSuchElementException(java.util.NoSuchElementException)

Example 2 with CleanableFile

use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.

the class DruidSegmentReaderTest method testMakeCloseableIteratorFromSequenceAndSegmentFileCloseYielderOnClose.

@Test
public void testMakeCloseableIteratorFromSequenceAndSegmentFileCloseYielderOnClose() throws IOException {
    MutableBoolean isSequenceClosed = new MutableBoolean(false);
    MutableBoolean isFileClosed = new MutableBoolean(false);
    Sequence<Map<String, Object>> sequence = new BaseSequence<>(new IteratorMaker<Map<String, Object>, Iterator<Map<String, Object>>>() {

        @Override
        public Iterator<Map<String, Object>> make() {
            return Collections.emptyIterator();
        }

        @Override
        public void cleanup(Iterator<Map<String, Object>> iterFromMake) {
            isSequenceClosed.setValue(true);
        }
    });
    CleanableFile cleanableFile = new CleanableFile() {

        @Override
        public File file() {
            return null;
        }

        @Override
        public void close() {
            isFileClosed.setValue(true);
        }
    };
    try (CloseableIterator<Map<String, Object>> iterator = DruidSegmentReader.makeCloseableIteratorFromSequenceAndSegmentFile(sequence, cleanableFile)) {
        while (iterator.hasNext()) {
            iterator.next();
        }
    }
    Assert.assertTrue("File is not closed", isFileClosed.booleanValue());
    Assert.assertTrue("Sequence is not closed", isSequenceClosed.booleanValue());
}
Also used : MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) Iterator(java.util.Iterator) CleanableFile(org.apache.druid.data.input.InputEntity.CleanableFile) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest) Test(org.junit.Test)

Example 3 with CleanableFile

use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.

the class OrcReader method intermediateRowIterator.

@Override
protected CloseableIterator<OrcStruct> intermediateRowIterator() throws IOException {
    final Closer closer = Closer.create();
    // We fetch here to cache a copy locally. However, this might need to be changed if we want to split an orc file
    // into several InputSplits in the future.
    final byte[] buffer = new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE];
    final CleanableFile file = closer.register(source.fetch(temporaryDirectory, buffer));
    final Path path = new Path(file.file().toURI());
    final ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
    final Reader reader;
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        reader = closer.register(OrcFile.createReader(path, OrcFile.readerOptions(conf)));
    } finally {
        Thread.currentThread().setContextClassLoader(currentClassLoader);
    }
    // The below line will get the schmea to read the whole columns.
    // This can be improved by projecting some columns only what users want in the future.
    final TypeDescription schema = reader.getSchema();
    final RecordReader batchReader = reader.rows(reader.options());
    final OrcMapredRecordReader<OrcStruct> recordReader = new OrcMapredRecordReader<>(batchReader, schema);
    closer.register(recordReader::close);
    return new CloseableIterator<OrcStruct>() {

        final NullWritable key = recordReader.createKey();

        OrcStruct value = null;

        @Override
        public boolean hasNext() {
            if (value == null) {
                try {
                    // The returned OrcStruct in next() can be kept in memory for a while.
                    // Here, we create a new instance of OrcStruct before calling RecordReader.next(),
                    // so that we can avoid to share the same reference to the "value" across rows.
                    value = recordReader.createValue();
                    if (!recordReader.next(key, value)) {
                        value = null;
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return value != null;
        }

        @Override
        public OrcStruct next() {
            if (value == null) {
                throw new NoSuchElementException();
            }
            final OrcStruct currentValue = value;
            value = null;
            return currentValue;
        }

        @Override
        public void close() throws IOException {
            closer.close();
        }
    };
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Path(org.apache.hadoop.fs.Path) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) RecordReader(org.apache.orc.RecordReader) OrcMapredRecordReader(org.apache.orc.mapred.OrcMapredRecordReader) Reader(org.apache.orc.Reader) RecordReader(org.apache.orc.RecordReader) IntermediateRowParsingReader(org.apache.druid.data.input.IntermediateRowParsingReader) OrcMapredRecordReader(org.apache.orc.mapred.OrcMapredRecordReader) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) OrcStruct(org.apache.orc.mapred.OrcStruct) TypeDescription(org.apache.orc.TypeDescription) OrcMapredRecordReader(org.apache.orc.mapred.OrcMapredRecordReader) CleanableFile(org.apache.druid.data.input.InputEntity.CleanableFile) NoSuchElementException(java.util.NoSuchElementException)

Example 4 with CleanableFile

use of org.apache.druid.data.input.InputEntity.CleanableFile in project druid by druid-io.

the class DruidSegmentReader method intermediateRowIterator.

@Override
protected CloseableIterator<Map<String, Object>> intermediateRowIterator() throws IOException {
    final CleanableFile segmentFile = source.fetch(temporaryDirectory, null);
    final WindowedStorageAdapter storageAdapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile.file())), source.getIntervalFilter());
    final Sequence<Cursor> cursors = storageAdapter.getAdapter().makeCursors(Filters.toFilter(dimFilter), storageAdapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    // Retain order of columns from the original segments. Useful for preserving dimension order if we're in
    // schemaless mode.
    final Set<String> columnsToRead = Sets.newLinkedHashSet(Iterables.filter(storageAdapter.getAdapter().getRowSignature().getColumnNames(), columnsFilter::apply));
    final Sequence<Map<String, Object>> sequence = Sequences.concat(Sequences.map(cursors, cursor -> cursorToSequence(cursor, columnsToRead)));
    return makeCloseableIteratorFromSequenceAndSegmentFile(sequence, segmentFile);
}
Also used : TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) IndexedInts(org.apache.druid.segment.data.IndexedInts) ColumnProcessors(org.apache.druid.segment.ColumnProcessors) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) Map(java.util.Map) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) Sequence(org.apache.druid.java.util.common.guava.Sequence) ColumnsFilter(org.apache.druid.data.input.ColumnsFilter) Set(java.util.Set) Sets(com.google.common.collect.Sets) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) IntermediateRowParsingReader(org.apache.druid.data.input.IntermediateRowParsingReader) DimFilter(org.apache.druid.query.filter.DimFilter) Entry(java.util.Map.Entry) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) Iterables(com.google.common.collect.Iterables) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Supplier(com.google.common.base.Supplier) CollectionUtils(org.apache.druid.utils.CollectionUtils) InputRowSchema(org.apache.druid.data.input.InputRowSchema) ArrayList(java.util.ArrayList) Yielders(org.apache.druid.java.util.common.guava.Yielders) CleanableFile(org.apache.druid.data.input.InputEntity.CleanableFile) DimensionSelector(org.apache.druid.segment.DimensionSelector) Yielder(org.apache.druid.java.util.common.guava.Yielder) NoSuchElementException(java.util.NoSuchElementException) Sequences(org.apache.druid.java.util.common.guava.Sequences) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) Iterator(java.util.Iterator) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IOException(java.io.IOException) ColumnProcessorFactory(org.apache.druid.segment.ColumnProcessorFactory) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Cursor(org.apache.druid.segment.Cursor) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) InputEntity(org.apache.druid.data.input.InputEntity) IndexIO(org.apache.druid.segment.IndexIO) Filters(org.apache.druid.segment.filter.Filters) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) CleanableFile(org.apache.druid.data.input.InputEntity.CleanableFile) Cursor(org.apache.druid.segment.Cursor) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Map(java.util.Map)

Aggregations

CleanableFile (org.apache.druid.data.input.InputEntity.CleanableFile)4 CloseableIterator (org.apache.druid.java.util.common.parsers.CloseableIterator)4 IOException (java.io.IOException)3 NoSuchElementException (java.util.NoSuchElementException)3 Iterator (java.util.Iterator)2 Map (java.util.Map)2 IntermediateRowParsingReader (org.apache.druid.data.input.IntermediateRowParsingReader)2 Closer (org.apache.druid.java.util.common.io.Closer)2 ParseException (org.apache.druid.java.util.common.parsers.ParseException)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 Supplier (com.google.common.base.Supplier)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Iterables (com.google.common.collect.Iterables)1 Sets (com.google.common.collect.Sets)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 List (java.util.List)1 Entry (java.util.Map.Entry)1