Search in sources :

Example 6 with ColumnDescriptor

use of parquet.column.ColumnDescriptor in project presto by prestodb.

the class ParquetPredicateUtils method getDictionariesByColumnOrdinal.

private static Map<Integer, ParquetDictionaryDescriptor> getDictionariesByColumnOrdinal(BlockMetaData blockMetadata, ParquetDataSource dataSource, MessageType requestedSchema, TupleDomain<HiveColumnHandle> effectivePredicate) {
    ImmutableMap.Builder<Integer, ParquetDictionaryDescriptor> dictionaries = ImmutableMap.builder();
    for (int ordinal = 0; ordinal < blockMetadata.getColumns().size(); ordinal++) {
        ColumnChunkMetaData columnChunkMetaData = blockMetadata.getColumns().get(ordinal);
        for (int i = 0; i < requestedSchema.getColumns().size(); i++) {
            ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(i);
            if (isColumnPredicate(columnDescriptor, effectivePredicate) && columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath())) && isOnlyDictionaryEncodingPages(columnChunkMetaData.getEncodings())) {
                try {
                    int totalSize = toIntExact(columnChunkMetaData.getTotalSize());
                    byte[] buffer = new byte[totalSize];
                    dataSource.readFully(columnChunkMetaData.getStartingPos(), buffer);
                    Optional<ParquetDictionaryPage> dictionaryPage = readDictionaryPage(buffer, columnChunkMetaData.getCodec());
                    dictionaries.put(ordinal, new ParquetDictionaryDescriptor(columnDescriptor, dictionaryPage));
                } catch (IOException ignored) {
                }
                break;
            }
        }
    }
    return dictionaries.build();
}
Also used : ParquetDictionaryPage(com.facebook.presto.hive.parquet.ParquetDictionaryPage) ColumnChunkMetaData(parquet.hadoop.metadata.ColumnChunkMetaData) ColumnDescriptor(parquet.column.ColumnDescriptor) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

ColumnDescriptor (parquet.column.ColumnDescriptor)6 PrimitiveColumnIO (parquet.io.PrimitiveColumnIO)3 ParquetDictionaryPage (com.facebook.presto.hive.parquet.ParquetDictionaryPage)2 RichColumnDescriptor (com.facebook.presto.hive.parquet.RichColumnDescriptor)2 ParquetDictionary (com.facebook.presto.hive.parquet.dictionary.ParquetDictionary)1 AggregatedMemoryContext (com.facebook.presto.hive.parquet.memory.AggregatedMemoryContext)1 ParquetReader (com.facebook.presto.hive.parquet.reader.ParquetReader)1 Block (com.facebook.presto.spi.block.Block)1 Domain (com.facebook.presto.spi.predicate.Domain)1 TupleDomain (com.facebook.presto.spi.predicate.TupleDomain)1 TypeManager (com.facebook.presto.spi.type.TypeManager)1 TypeRegistry (com.facebook.presto.type.TypeRegistry)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1