Search in sources :

Example 6 with ColumnMetadata

use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.

the class OrcSelectiveRecordReader method createColumnReaders.

public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
    int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
    SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
    colReaderWithFilter = new IntArraySet();
    colReaderWithORFilter = new IntArraySet();
    colReaderWithoutFilter = new IntArraySet();
    IntArraySet remainingColumns = new IntArraySet();
    remainingColumns.addAll(includedColumns.keySet());
    for (int i = 0; i < fieldCount; i++) {
        // create column reader only for columns which are part of projection and filter.
        if (includedColumns.containsKey(i)) {
            int columnIndex = i;
            OrcColumn column = fileColumns.get(columnIndex);
            boolean outputRequired = outputColumns.contains(i);
            SelectiveColumnReader columnReader = null;
            if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
                ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
                columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
            } else {
                columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
                if (orcCacheProperties.isRowDataCacheEnabled()) {
                    columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
                }
            }
            columnReaders[columnIndex] = columnReader;
            if (filters.get(i) != null) {
                colReaderWithFilter.add(columnIndex);
            } else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
                colReaderWithORFilter.add(columnIndex);
            } else {
                colReaderWithoutFilter.add(columnIndex);
            }
            remainingColumns.remove(columnIndex);
        }
    }
    /* if any still remaining colIdx < 0 */
    remainingColumns.removeAll(missingColumns);
    for (Integer col : remainingColumns) {
        if (col < 0) {
            /* should be always true! */
            if (filters.get(col) != null) {
                colReaderWithFilter.add(col);
            } else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
                colReaderWithORFilter.add(col);
            }
        }
    }
    // specially for alter add column case:
    for (int missingColumn : missingColumns) {
        if (filters.get(missingColumn) != null) {
            colReaderWithFilter.add(missingColumn);
        } else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
            colReaderWithORFilter.add(missingColumn);
        }
    }
    return columnReaders;
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) TypeNotFoundException(io.prestosql.spi.type.TypeNotFoundException) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) PostScript(io.prestosql.orc.metadata.PostScript) ArrayList(java.util.ArrayList) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) SelectiveColumnReaders(io.prestosql.orc.reader.SelectiveColumnReaders) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) OrcType(io.prestosql.orc.metadata.OrcType) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) Set(java.util.Set) Page(io.prestosql.spi.Page) IOException(java.io.IOException) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) ColumnReader(io.prestosql.orc.reader.ColumnReader) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader) Domain(io.prestosql.spi.predicate.Domain) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) BitSet(java.util.BitSet) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader)

Example 7 with ColumnMetadata

use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.

the class StripeReader method createRowGroups.

private List<RowGroup> createRowGroups(int rowsInStripe, Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, List<RowGroupIndex>> columnIndexes, Set<Integer> selectedRowGroups, ColumnMetadata<ColumnEncoding> encodings) throws InvalidCheckpointException {
    ImmutableList.Builder<RowGroup> rowGroupBuilder = ImmutableList.builder();
    for (int rowGroupId : selectedRowGroups) {
        Map<StreamId, StreamCheckpoint> checkpoints = getStreamCheckpoints(includedOrcColumnIds, types, decompressor.isPresent(), rowGroupId, encodings, streams, columnIndexes);
        int rowOffset = rowGroupId * rowsInRowGroup;
        int rowsInGroup = Math.min(rowsInStripe - rowOffset, rowsInRowGroup);
        long minAverageRowBytes = columnIndexes.entrySet().stream().mapToLong(e -> e.getValue().get(rowGroupId).getColumnStatistics().getMinAverageValueSizeInBytes()).sum();
        rowGroupBuilder.add(createRowGroup(rowGroupId, rowOffset, rowsInGroup, minAverageRowBytes, valueStreams, checkpoints));
    }
    return rowGroupBuilder.build();
}
Also used : CheckpointInputStreamSource.createCheckpointStreamSource(io.prestosql.orc.stream.CheckpointInputStreamSource.createCheckpointStreamSource) OrcDataReader(io.prestosql.orc.stream.OrcDataReader) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ValueInputStreamSource(io.prestosql.orc.stream.ValueInputStreamSource) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) ImmutableSet(com.google.common.collect.ImmutableSet) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Set(java.util.Set) DICTIONARY_DATA(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_DATA) Checkpoints.getStreamCheckpoints(io.prestosql.orc.checkpoint.Checkpoints.getStreamCheckpoints) ZoneId(java.time.ZoneId) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) InputStreamSource(io.prestosql.orc.stream.InputStreamSource) DICTIONARY(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BLOOM_FILTER_UTF8(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER_UTF8) Entry(java.util.Map.Entry) Optional(java.util.Optional) InvalidCheckpointException(io.prestosql.orc.checkpoint.InvalidCheckpointException) DICTIONARY_V2(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2) Slice(io.airlift.slice.Slice) OrcChunkLoader(io.prestosql.orc.stream.OrcChunkLoader) Logger(io.airlift.log.Logger) ColumnEncodingKind(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind) DICTIONARY_COUNT(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_COUNT) HashMap(java.util.HashMap) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) Math.toIntExact(java.lang.Math.toIntExact) LinkedHashSet(java.util.LinkedHashSet) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) ValueInputStream(io.prestosql.orc.stream.ValueInputStream) ROW_INDEX(io.prestosql.orc.metadata.Stream.StreamKind.ROW_INDEX) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Stream(io.prestosql.orc.metadata.Stream) BLOOM_FILTER(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER) ExecutionException(java.util.concurrent.ExecutionException) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) ValueStreams(io.prestosql.orc.stream.ValueStreams) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter) InputStream(java.io.InputStream) LENGTH(io.prestosql.orc.metadata.Stream.StreamKind.LENGTH) ImmutableList(com.google.common.collect.ImmutableList) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint)

Aggregations

ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)7 ColumnStatistics (io.prestosql.orc.metadata.statistics.ColumnStatistics)6 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)5 Logger (io.airlift.log.Logger)5 Slice (io.airlift.slice.Slice)5 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)5 OrcType (io.prestosql.orc.metadata.OrcType)5 StripeInformation (io.prestosql.orc.metadata.StripeInformation)5 Preconditions.checkState (com.google.common.base.Preconditions.checkState)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 ColumnEncoding (io.prestosql.orc.metadata.ColumnEncoding)4 Stream (io.prestosql.orc.metadata.Stream)4 StripeFooter (io.prestosql.orc.metadata.StripeFooter)4 IOException (java.io.IOException)4 Math.toIntExact (java.lang.Math.toIntExact)4 ArrayList (java.util.ArrayList)4 List (java.util.List)4 Map (java.util.Map)4 Objects.requireNonNull (java.util.Objects.requireNonNull)4