Search in sources :

Example 11 with RichColumnDescriptor

use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.

the class ParquetReader method initializeColumnReaders.

private void initializeColumnReaders() {
    for (PrimitiveColumnIO columnIO : columns) {
        RichColumnDescriptor column = new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
        columnReaders[columnIO.getId()] = ColumnReaderFactory.createReader(column, batchReadEnabled);
        if (enableVerification) {
            verificationColumnReaders[columnIO.getId()] = ColumnReaderFactory.createReader(column, false);
        }
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO)

Example 12 with RichColumnDescriptor

use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.

the class PredicateUtils method dictionaryPredicatesMatch.

private static boolean dictionaryPredicatesMatch(Predicate parquetPredicate, BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain) {
    for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) {
        RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray()));
        if (descriptor != null) {
            if (isOnlyDictionaryEncodingPages(columnMetaData) && isColumnPredicate(descriptor, parquetTupleDomain)) {
                byte[] buffer = new byte[toIntExact(columnMetaData.getTotalSize())];
                dataSource.readFully(columnMetaData.getStartingPos(), buffer);
                // Early abort, predicate already filters block so no more dictionaries need be read
                if (!parquetPredicate.matches(new DictionaryDescriptor(descriptor, readDictionaryPage(buffer, columnMetaData.getCodec())))) {
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor)

Example 13 with RichColumnDescriptor

use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.

the class DeltaPageSourceProvider method getParquetTupleDomain.

public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<DeltaColumnHandle> effectivePredicate) {
    if (effectivePredicate.isNone()) {
        return TupleDomain.none();
    }
    ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
    for (Map.Entry<DeltaColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
        DeltaColumnHandle columnHandle = entry.getKey();
        RichColumnDescriptor descriptor;
        if (isPushedDownSubfield(columnHandle)) {
            Subfield pushedDownSubfield = getPushedDownSubfield(columnHandle);
            List<String> subfieldPath = columnPathFromSubfield(pushedDownSubfield);
            descriptor = descriptorsByPath.get(subfieldPath);
        } else {
            descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
        }
        if (descriptor != null) {
            predicate.put(descriptor, entry.getValue());
        }
    }
    return TupleDomain.withColumnDomains(predicate.build());
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) DeltaColumnHandle.getPushedDownSubfield(com.facebook.presto.delta.DeltaColumnHandle.getPushedDownSubfield) DeltaColumnHandle.isPushedDownSubfield(com.facebook.presto.delta.DeltaColumnHandle.isPushedDownSubfield) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) Subfield(com.facebook.presto.common.Subfield)

Example 14 with RichColumnDescriptor

use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.

the class TestParquetPredicateUtils method testParquetTupleDomainMap.

@Test
public void testParquetTupleDomainMap() {
    HiveColumnHandle columnHandle = new HiveColumnHandle("my_map", HiveType.valueOf("map<int,int>"), parseTypeSignature(StandardTypes.MAP), 0, REGULAR, Optional.empty(), Optional.empty());
    MapType mapType = new MapType(INTEGER, INTEGER, methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"));
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType)));
    MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value"))));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
    assertTrue(tupleDomain.getDomains().get().isEmpty());
}
Also used : GroupType(org.apache.parquet.schema.GroupType) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) PrimitiveType(org.apache.parquet.schema.PrimitiveType) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) MapType(com.facebook.presto.common.type.MapType) MessageType(org.apache.parquet.schema.MessageType) Test(org.testng.annotations.Test)

Example 15 with RichColumnDescriptor

use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.

the class TestParquetPredicateUtils method testParquetTupleDomainPrimitiveArray.

@Test
public void testParquetTupleDomainPrimitiveArray() {
    HiveColumnHandle columnHandle = new HiveColumnHandle("my_array", HiveType.valueOf("array<int>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty(), Optional.empty());
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(INTEGER))));
    MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array", new GroupType(REPEATED, "bag", new PrimitiveType(OPTIONAL, INT32, "array_element"))));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
    assertTrue(tupleDomain.getDomains().get().isEmpty());
}
Also used : ArrayType(com.facebook.presto.common.type.ArrayType) GroupType(org.apache.parquet.schema.GroupType) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) PrimitiveType(org.apache.parquet.schema.PrimitiveType) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) MessageType(org.apache.parquet.schema.MessageType) Test(org.testng.annotations.Test)

Aggregations

RichColumnDescriptor (com.facebook.presto.parquet.RichColumnDescriptor)16 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)12 Domain (com.facebook.presto.common.predicate.Domain)9 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)9 ImmutableList (com.google.common.collect.ImmutableList)9 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)8 List (java.util.List)8 MessageType (org.apache.parquet.schema.MessageType)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 Subfield (com.facebook.presto.common.Subfield)4 Type (com.facebook.presto.common.type.Type)4 Field (com.facebook.presto.parquet.Field)4 ParquetTypeUtils.columnPathFromSubfield (com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield)4 GroupType (org.apache.parquet.schema.GroupType)4 TypeManager (com.facebook.presto.common.type.TypeManager)3 PrimitiveType (org.apache.parquet.schema.PrimitiveType)3 Test (org.testng.annotations.Test)3 RuntimeStats (com.facebook.presto.common.RuntimeStats)2 MapType (com.facebook.presto.common.type.MapType)2 RowType (com.facebook.presto.common.type.RowType)2