Search in sources :

Example 1 with ColumnReaders

use of io.prestosql.orc.reader.ColumnReaders in project hetu-core by openlookeng.

the class OrcRecordReader method createColumnReaders.

private ColumnReader[] createColumnReaders(List<OrcColumn> columns, List<Type> readTypes, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties) throws OrcCorruptionException {
    ColumnReader[] columnReaders = new ColumnReader[columns.size()];
    for (int i = 0; i < columns.size(); i++) {
        int columnIndex = i;
        Type readType = readTypes.get(columnIndex);
        OrcColumn column = columns.get(columnIndex);
        ColumnReader columnReader = createColumnReader(readType, column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
        if (orcCacheProperties.isRowDataCacheEnabled()) {
            columnReader = ColumnReaders.wrapWithCachingStreamReader(columnReader, column, orcCacheStore.getRowDataCache());
        }
        columnReaders[columnIndex] = columnReader;
    }
    return columnReaders;
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) DATASOURCE_TOTAL_PAGES(io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) DATASOURCE_FILE_PATH(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_PATH) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) ArrayList(java.util.ArrayList) DATASOURCE_STRIPE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_NUMBER) DATASOURCE_STRIPE_OFFSET(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET) Slices(io.airlift.slice.Slices) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) DATASOURCE_FILE_MODIFICATION(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_MODIFICATION) OrcType(io.prestosql.orc.metadata.OrcType) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) DATASOURCE_INDEX_LEVEL(io.prestosql.spi.HetuConstant.DATASOURCE_INDEX_LEVEL) ColumnReader(io.prestosql.orc.reader.ColumnReader) DATASOURCE_PAGE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Domain(io.prestosql.spi.predicate.Domain) DATASOURCE_STRIPE_LENGTH(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_LENGTH) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) Type(io.prestosql.spi.type.Type) OrcType(io.prestosql.orc.metadata.OrcType) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader)

Example 2 with ColumnReaders

use of io.prestosql.orc.reader.ColumnReaders in project hetu-core by openlookeng.

the class OrcSelectiveRecordReader method createColumnReaders.

public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
    int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
    SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
    colReaderWithFilter = new IntArraySet();
    colReaderWithORFilter = new IntArraySet();
    colReaderWithoutFilter = new IntArraySet();
    IntArraySet remainingColumns = new IntArraySet();
    remainingColumns.addAll(includedColumns.keySet());
    for (int i = 0; i < fieldCount; i++) {
        // create column reader only for columns which are part of projection and filter.
        if (includedColumns.containsKey(i)) {
            int columnIndex = i;
            OrcColumn column = fileColumns.get(columnIndex);
            boolean outputRequired = outputColumns.contains(i);
            SelectiveColumnReader columnReader = null;
            if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
                ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
                columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
            } else {
                columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
                if (orcCacheProperties.isRowDataCacheEnabled()) {
                    columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
                }
            }
            columnReaders[columnIndex] = columnReader;
            if (filters.get(i) != null) {
                colReaderWithFilter.add(columnIndex);
            } else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
                colReaderWithORFilter.add(columnIndex);
            } else {
                colReaderWithoutFilter.add(columnIndex);
            }
            remainingColumns.remove(columnIndex);
        }
    }
    /* if any still remaining colIdx < 0 */
    remainingColumns.removeAll(missingColumns);
    for (Integer col : remainingColumns) {
        if (col < 0) {
            /* should be always true! */
            if (filters.get(col) != null) {
                colReaderWithFilter.add(col);
            } else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
                colReaderWithORFilter.add(col);
            }
        }
    }
    // specially for alter add column case:
    for (int missingColumn : missingColumns) {
        if (filters.get(missingColumn) != null) {
            colReaderWithFilter.add(missingColumn);
        } else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
            colReaderWithORFilter.add(missingColumn);
        }
    }
    return columnReaders;
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) TypeNotFoundException(io.prestosql.spi.type.TypeNotFoundException) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) PostScript(io.prestosql.orc.metadata.PostScript) ArrayList(java.util.ArrayList) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) SelectiveColumnReaders(io.prestosql.orc.reader.SelectiveColumnReaders) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) OrcType(io.prestosql.orc.metadata.OrcType) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) Set(java.util.Set) Page(io.prestosql.spi.Page) IOException(java.io.IOException) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) ColumnReader(io.prestosql.orc.reader.ColumnReader) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader) Domain(io.prestosql.spi.predicate.Domain) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) BitSet(java.util.BitSet) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader)

Aggregations

PeekingIterator (com.google.common.collect.PeekingIterator)2 Logger (io.airlift.log.Logger)2 Slice (io.airlift.slice.Slice)2 DataSize (io.airlift.units.DataSize)2 AggregatedMemoryContext (io.prestosql.memory.context.AggregatedMemoryContext)2 ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)2 MetadataReader (io.prestosql.orc.metadata.MetadataReader)2 OrcType (io.prestosql.orc.metadata.OrcType)2 StripeInformation (io.prestosql.orc.metadata.StripeInformation)2 ColumnStatistics (io.prestosql.orc.metadata.statistics.ColumnStatistics)2 StripeStatistics (io.prestosql.orc.metadata.statistics.StripeStatistics)2 ColumnReader (io.prestosql.orc.reader.ColumnReader)2 ColumnReaders (io.prestosql.orc.reader.ColumnReaders)2 Page (io.prestosql.spi.Page)2 Block (io.prestosql.spi.block.Block)2 IndexMetadata (io.prestosql.spi.heuristicindex.IndexMetadata)2 Domain (io.prestosql.spi.predicate.Domain)2 Type (io.prestosql.spi.type.Type)2 IOException (java.io.IOException)2 Math.toIntExact (java.lang.Math.toIntExact)2