Search in sources :

Example 1 with ParquetDictionary

use of com.facebook.presto.hive.parquet.dictionary.ParquetDictionary in project presto by prestodb.

the class TupleDomainParquetPredicate method getDomain.

@VisibleForTesting
public static Domain getDomain(Type type, ParquetDictionaryDescriptor dictionaryDescriptor) {
    if (dictionaryDescriptor == null) {
        return null;
    }
    ColumnDescriptor columnDescriptor = dictionaryDescriptor.getColumnDescriptor();
    Optional<ParquetDictionaryPage> dictionaryPage = dictionaryDescriptor.getDictionaryPage();
    if (!dictionaryPage.isPresent()) {
        return null;
    }
    ParquetDictionary dictionary;
    try {
        dictionary = dictionaryPage.get().getEncoding().initDictionary(columnDescriptor, dictionaryPage.get());
    } catch (Exception e) {
        // OK to ignore exception when reading dictionaries
        return null;
    }
    int dictionarySize = dictionaryPage.get().getDictionarySize();
    if (type.equals(BIGINT) && columnDescriptor.getType() == PrimitiveTypeName.INT64) {
        List<Domain> domains = new ArrayList<>();
        for (int i = 0; i < dictionarySize; i++) {
            domains.add(Domain.singleValue(type, dictionary.decodeToLong(i)));
        }
        domains.add(Domain.onlyNull(type));
        return Domain.union(domains);
    } else if (type.equals(BIGINT) && columnDescriptor.getType() == PrimitiveTypeName.INT32) {
        List<Domain> domains = new ArrayList<>();
        for (int i = 0; i < dictionarySize; i++) {
            domains.add(Domain.singleValue(type, (long) dictionary.decodeToInt(i)));
        }
        domains.add(Domain.onlyNull(type));
        return Domain.union(domains);
    } else if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName.DOUBLE) {
        List<Domain> domains = new ArrayList<>();
        for (int i = 0; i < dictionarySize; i++) {
            domains.add(Domain.singleValue(type, dictionary.decodeToDouble(i)));
        }
        domains.add(Domain.onlyNull(type));
        return Domain.union(domains);
    } else if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName.FLOAT) {
        List<Domain> domains = new ArrayList<>();
        for (int i = 0; i < dictionarySize; i++) {
            domains.add(Domain.singleValue(type, (double) dictionary.decodeToFloat(i)));
        }
        domains.add(Domain.onlyNull(type));
        return Domain.union(domains);
    } else if (isVarcharType(type) && columnDescriptor.getType() == PrimitiveTypeName.BINARY) {
        List<Domain> domains = new ArrayList<>();
        for (int i = 0; i < dictionarySize; i++) {
            domains.add(Domain.singleValue(type, Slices.wrappedBuffer(dictionary.decodeToBinary(i).getBytes())));
        }
        domains.add(Domain.onlyNull(type));
        return Domain.union(domains);
    }
    return null;
}
Also used : ParquetDictionaryPage(com.facebook.presto.hive.parquet.ParquetDictionaryPage) ColumnDescriptor(parquet.column.ColumnDescriptor) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ParquetDictionary(com.facebook.presto.hive.parquet.dictionary.ParquetDictionary) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

ParquetDictionaryPage (com.facebook.presto.hive.parquet.ParquetDictionaryPage)1 ParquetDictionary (com.facebook.presto.hive.parquet.dictionary.ParquetDictionary)1 Domain (com.facebook.presto.spi.predicate.Domain)1 TupleDomain (com.facebook.presto.spi.predicate.TupleDomain)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 ColumnDescriptor (parquet.column.ColumnDescriptor)1