Search in sources :

Example 1 with OrcType

use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.

the class OrcReader method createOrcColumn.

private static OrcColumn createOrcColumn(String parentStreamName, String fieldName, OrcColumnId columnId, ColumnMetadata<OrcType> types, OrcDataSourceId orcDataSourceId) {
    String path = fieldName.isEmpty() ? parentStreamName : parentStreamName + "." + fieldName;
    OrcType orcType = types.get(columnId);
    List<OrcColumn> nestedColumns = ImmutableList.of();
    if (orcType.getOrcTypeKind() == OrcTypeKind.STRUCT) {
        nestedColumns = IntStream.range(0, orcType.getFieldCount()).mapToObj(fieldId -> createOrcColumn(path, orcType.getFieldName(fieldId), orcType.getFieldTypeIndex(fieldId), types, orcDataSourceId)).collect(toImmutableList());
    } else if (orcType.getOrcTypeKind() == OrcTypeKind.LIST) {
        nestedColumns = ImmutableList.of(createOrcColumn(path, "item", orcType.getFieldTypeIndex(0), types, orcDataSourceId));
    } else if (orcType.getOrcTypeKind() == OrcTypeKind.MAP) {
        nestedColumns = ImmutableList.of(createOrcColumn(path, "key", orcType.getFieldTypeIndex(0), types, orcDataSourceId), createOrcColumn(path, "value", orcType.getFieldTypeIndex(1), types, orcDataSourceId));
    }
    return new OrcColumn(path, columnId, fieldName, orcType.getOrcTypeKind(), orcDataSourceId, nestedColumns);
}
Also used : OrcType(io.prestosql.orc.metadata.OrcType)

Example 2 with OrcType

use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.

the class StripeReader method getRowGroupStatistics.

private static ColumnMetadata<ColumnStatistics> getRowGroupStatistics(ColumnMetadata<OrcType> types, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
    requireNonNull(columnIndexes, "columnIndexes is null");
    checkArgument(rowGroup >= 0, "rowGroup is negative");
    Map<Integer, List<RowGroupIndex>> rowGroupIndexesByColumn = columnIndexes.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getColumnId().getId(), Entry::getValue));
    List<ColumnStatistics> statistics = new ArrayList<>(types.size());
    for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) {
        List<RowGroupIndex> rowGroupIndexes = rowGroupIndexesByColumn.get(columnIndex);
        if (rowGroupIndexes != null) {
            statistics.add(rowGroupIndexes.get(rowGroup).getColumnStatistics());
        } else {
            statistics.add(null);
        }
    }
    return new ColumnMetadata<>(statistics);
}
Also used : CheckpointInputStreamSource.createCheckpointStreamSource(io.prestosql.orc.stream.CheckpointInputStreamSource.createCheckpointStreamSource) OrcDataReader(io.prestosql.orc.stream.OrcDataReader) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ValueInputStreamSource(io.prestosql.orc.stream.ValueInputStreamSource) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) StripeFooter(io.prestosql.orc.metadata.StripeFooter) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) OrcInputStream(io.prestosql.orc.stream.OrcInputStream) ImmutableSet(com.google.common.collect.ImmutableSet) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Set(java.util.Set) DICTIONARY_DATA(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_DATA) Checkpoints.getStreamCheckpoints(io.prestosql.orc.checkpoint.Checkpoints.getStreamCheckpoints) ZoneId(java.time.ZoneId) Preconditions.checkState(com.google.common.base.Preconditions.checkState) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) InputStreamSource(io.prestosql.orc.stream.InputStreamSource) DICTIONARY(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BLOOM_FILTER_UTF8(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER_UTF8) Entry(java.util.Map.Entry) Optional(java.util.Optional) InvalidCheckpointException(io.prestosql.orc.checkpoint.InvalidCheckpointException) DICTIONARY_V2(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind.DICTIONARY_V2) Slice(io.airlift.slice.Slice) OrcChunkLoader(io.prestosql.orc.stream.OrcChunkLoader) Logger(io.airlift.log.Logger) ColumnEncodingKind(io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind) DICTIONARY_COUNT(io.prestosql.orc.metadata.Stream.StreamKind.DICTIONARY_COUNT) HashMap(java.util.HashMap) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) Math.toIntExact(java.lang.Math.toIntExact) LinkedHashSet(java.util.LinkedHashSet) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) ValueInputStream(io.prestosql.orc.stream.ValueInputStream) ROW_INDEX(io.prestosql.orc.metadata.Stream.StreamKind.ROW_INDEX) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Stream(io.prestosql.orc.metadata.Stream) BLOOM_FILTER(io.prestosql.orc.metadata.Stream.StreamKind.BLOOM_FILTER) ExecutionException(java.util.concurrent.ExecutionException) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) ValueStreams(io.prestosql.orc.stream.ValueStreams) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter) InputStream(java.io.InputStream) LENGTH(io.prestosql.orc.metadata.Stream.StreamKind.LENGTH) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Checkpoints.getDictionaryStreamCheckpoint(io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint)

Example 3 with OrcType

use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.

the class ColumnWriters method createColumnWriter.

public static ColumnWriter createColumnWriter(OrcColumnId columnId, ColumnMetadata<OrcType> orcTypes, Type type, CompressionKind compression, int bufferSize, DataSize stringStatisticsLimit) {
    requireNonNull(type, "type is null");
    OrcType orcType = orcTypes.get(columnId);
    switch(orcType.getOrcTypeKind()) {
        case BOOLEAN:
            return new BooleanColumnWriter(columnId, type, compression, bufferSize);
        case FLOAT:
            return new FloatColumnWriter(columnId, type, compression, bufferSize);
        case DOUBLE:
            return new DoubleColumnWriter(columnId, type, compression, bufferSize);
        case BYTE:
            return new ByteColumnWriter(columnId, type, compression, bufferSize);
        case DATE:
            return new LongColumnWriter(columnId, type, compression, bufferSize, DateStatisticsBuilder::new);
        case SHORT:
        case INT:
        case LONG:
            return new LongColumnWriter(columnId, type, compression, bufferSize, IntegerStatisticsBuilder::new);
        case DECIMAL:
            return new DecimalColumnWriter(columnId, type, compression, bufferSize);
        case TIMESTAMP:
            return new TimestampColumnWriter(columnId, type, compression, bufferSize);
        case BINARY:
            return new SliceDirectColumnWriter(columnId, type, compression, bufferSize, BinaryStatisticsBuilder::new);
        case CHAR:
        case VARCHAR:
        case STRING:
            return new SliceDictionaryColumnWriter(columnId, type, compression, bufferSize, stringStatisticsLimit);
        case LIST:
            {
                OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(0);
                Type fieldType = type.getTypeParameters().get(0);
                ColumnWriter elementWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit);
                return new ListColumnWriter(columnId, compression, bufferSize, elementWriter);
            }
        case MAP:
            {
                ColumnWriter keyWriter = createColumnWriter(orcType.getFieldTypeIndex(0), orcTypes, type.getTypeParameters().get(0), compression, bufferSize, stringStatisticsLimit);
                ColumnWriter valueWriter = createColumnWriter(orcType.getFieldTypeIndex(1), orcTypes, type.getTypeParameters().get(1), compression, bufferSize, stringStatisticsLimit);
                return new MapColumnWriter(columnId, compression, bufferSize, keyWriter, valueWriter);
            }
        case STRUCT:
            {
                ImmutableList.Builder<ColumnWriter> fieldWriters = ImmutableList.builder();
                for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) {
                    OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(fieldId);
                    Type fieldType = type.getTypeParameters().get(fieldId);
                    fieldWriters.add(createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit));
                }
                return new StructColumnWriter(columnId, compression, bufferSize, fieldWriters.build());
            }
    }
    throw new IllegalArgumentException("Unsupported type: " + type);
}
Also used : DateStatisticsBuilder(io.prestosql.orc.metadata.statistics.DateStatisticsBuilder) BinaryStatisticsBuilder(io.prestosql.orc.metadata.statistics.BinaryStatisticsBuilder) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) BinaryStatisticsBuilder(io.prestosql.orc.metadata.statistics.BinaryStatisticsBuilder) IntegerStatisticsBuilder(io.prestosql.orc.metadata.statistics.IntegerStatisticsBuilder) DateStatisticsBuilder(io.prestosql.orc.metadata.statistics.DateStatisticsBuilder) IntegerStatisticsBuilder(io.prestosql.orc.metadata.statistics.IntegerStatisticsBuilder) OrcType(io.prestosql.orc.metadata.OrcType) Type(io.prestosql.spi.type.Type) OrcType(io.prestosql.orc.metadata.OrcType)

Example 4 with OrcType

use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.

the class OrcSelectiveRecordReader method createColumnReaders.

public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
    int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
    SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
    colReaderWithFilter = new IntArraySet();
    colReaderWithORFilter = new IntArraySet();
    colReaderWithoutFilter = new IntArraySet();
    IntArraySet remainingColumns = new IntArraySet();
    remainingColumns.addAll(includedColumns.keySet());
    for (int i = 0; i < fieldCount; i++) {
        // create column reader only for columns which are part of projection and filter.
        if (includedColumns.containsKey(i)) {
            int columnIndex = i;
            OrcColumn column = fileColumns.get(columnIndex);
            boolean outputRequired = outputColumns.contains(i);
            SelectiveColumnReader columnReader = null;
            if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
                ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
                columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
            } else {
                columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
                if (orcCacheProperties.isRowDataCacheEnabled()) {
                    columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
                }
            }
            columnReaders[columnIndex] = columnReader;
            if (filters.get(i) != null) {
                colReaderWithFilter.add(columnIndex);
            } else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
                colReaderWithORFilter.add(columnIndex);
            } else {
                colReaderWithoutFilter.add(columnIndex);
            }
            remainingColumns.remove(columnIndex);
        }
    }
    /* if any still remaining colIdx < 0 */
    remainingColumns.removeAll(missingColumns);
    for (Integer col : remainingColumns) {
        if (col < 0) {
            /* should be always true! */
            if (filters.get(col) != null) {
                colReaderWithFilter.add(col);
            } else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
                colReaderWithORFilter.add(col);
            }
        }
    }
    // specially for alter add column case:
    for (int missingColumn : missingColumns) {
        if (filters.get(missingColumn) != null) {
            colReaderWithFilter.add(missingColumn);
        } else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
            colReaderWithORFilter.add(missingColumn);
        }
    }
    return columnReaders;
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) TypeNotFoundException(io.prestosql.spi.type.TypeNotFoundException) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) PostScript(io.prestosql.orc.metadata.PostScript) ArrayList(java.util.ArrayList) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) SelectiveColumnReaders(io.prestosql.orc.reader.SelectiveColumnReaders) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) OrcType(io.prestosql.orc.metadata.OrcType) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) Set(java.util.Set) Page(io.prestosql.spi.Page) IOException(java.io.IOException) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) ColumnReader(io.prestosql.orc.reader.ColumnReader) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader) Domain(io.prestosql.spi.predicate.Domain) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) BitSet(java.util.BitSet) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader)

Aggregations

OrcType (io.prestosql.orc.metadata.OrcType)4 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)2 Logger (io.airlift.log.Logger)2 Slice (io.airlift.slice.Slice)2 AggregatedMemoryContext (io.prestosql.memory.context.AggregatedMemoryContext)2 ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)2 MetadataReader (io.prestosql.orc.metadata.MetadataReader)2 StripeInformation (io.prestosql.orc.metadata.StripeInformation)2 ColumnStatistics (io.prestosql.orc.metadata.statistics.ColumnStatistics)2 Type (io.prestosql.spi.type.Type)2 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Predicates (com.google.common.base.Predicates)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Maps (com.google.common.collect.Maps)1 PeekingIterator (com.google.common.collect.PeekingIterator)1 UncheckedExecutionException (com.google.common.util.concurrent.UncheckedExecutionException)1