Search in sources :

Example 11 with RichColumnDescriptor

use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.

the class TestParquetPredicateUtils method testParquetTupleDomainMap.

@Test
public void testParquetTupleDomainMap() {
    MapType mapType = new MapType(INTEGER, INTEGER, new TypeOperators());
    HiveColumnHandle columnHandle = createBaseColumn("my_map", 0, HiveType.valueOf("map<int,int>"), mapType, REGULAR, Optional.empty());
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType)));
    MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value"))));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);
    assertTrue(tupleDomain.isAll());
}
Also used : GroupType(org.apache.parquet.schema.GroupType) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) PrimitiveType(org.apache.parquet.schema.PrimitiveType) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) MapType(io.trino.spi.type.MapType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) MessageType(org.apache.parquet.schema.MessageType) TypeOperators(io.trino.spi.type.TypeOperators) Test(org.testng.annotations.Test)

Example 12 with RichColumnDescriptor

use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.

the class TestParquetPredicateUtils method testParquetTupleDomainStruct.

@Test
public void testParquetTupleDomainStruct() {
    RowType rowType = rowType(RowType.field("a", INTEGER), RowType.field("b", INTEGER));
    HiveColumnHandle columnHandle = createBaseColumn("my_struct", 0, HiveType.valueOf("struct<a:int,b:int>"), rowType, REGULAR, Optional.empty());
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType)));
    MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_struct", new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b")));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);
    assertTrue(tupleDomain.isAll());
}
Also used : GroupType(org.apache.parquet.schema.GroupType) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RowType(io.trino.spi.type.RowType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) MessageType(org.apache.parquet.schema.MessageType) Test(org.testng.annotations.Test)

Example 13 with RichColumnDescriptor

use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.

the class TupleDomainParquetPredicate method matches.

@Override
public boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> statistics, ParquetDataSourceId id) throws ParquetCorruptionException {
    if (numberOfRows == 0) {
        return false;
    }
    if (effectivePredicate.isNone()) {
        return false;
    }
    Map<ColumnDescriptor, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains"));
    for (RichColumnDescriptor column : columns) {
        Domain effectivePredicateDomain = effectivePredicateDomains.get(column);
        if (effectivePredicateDomain == null) {
            continue;
        }
        Statistics<?> columnStatistics = statistics.get(column);
        if (columnStatistics == null || columnStatistics.isEmpty()) {
            // no stats for column
            continue;
        }
        Domain domain = getDomain(column, effectivePredicateDomain.getType(), numberOfRows, columnStatistics, id, timeZone);
        if (!effectivePredicateDomain.overlaps(domain)) {
            return false;
        }
    }
    return true;
}
Also used : RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Example 14 with RichColumnDescriptor

use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.

the class TupleDomainParquetPredicate method matches.

@Override
public boolean matches(long numberOfRows, ColumnIndexStore columnIndexStore, ParquetDataSourceId id) throws ParquetCorruptionException {
    requireNonNull(columnIndexStore, "columnIndexStore is null");
    if (numberOfRows == 0) {
        return false;
    }
    if (effectivePredicate.isNone()) {
        return false;
    }
    Map<ColumnDescriptor, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains"));
    for (RichColumnDescriptor column : columns) {
        Domain effectivePredicateDomain = effectivePredicateDomains.get(column);
        if (effectivePredicateDomain == null) {
            continue;
        }
        ColumnIndex columnIndex = columnIndexStore.getColumnIndex(ColumnPath.get(column.getPath()));
        if (columnIndex == null) {
            continue;
        }
        Domain domain = getDomain(effectivePredicateDomain.getType(), numberOfRows, columnIndex, id, column, timeZone);
        if (!effectivePredicateDomain.overlaps(domain)) {
            return false;
        }
    }
    return true;
}
Also used : ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Example 15 with RichColumnDescriptor

use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.

the class ParquetReader method initializeColumnReaders.

private void initializeColumnReaders() {
    for (PrimitiveColumnIO columnIO : columns) {
        RichColumnDescriptor column = new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
        columnReaders[columnIO.getId()] = PrimitiveColumnReader.createReader(column, timeZone);
    }
}
Also used : RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) PrimitiveColumnIO(org.apache.parquet.io.PrimitiveColumnIO)

Aggregations

RichColumnDescriptor (io.trino.parquet.RichColumnDescriptor)16 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)12 ImmutableList (com.google.common.collect.ImmutableList)10 Domain (io.trino.spi.predicate.Domain)9 TupleDomain (io.trino.spi.predicate.TupleDomain)9 HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)8 List (java.util.List)8 MessageType (org.apache.parquet.schema.MessageType)8 GroupType (org.apache.parquet.schema.GroupType)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 Field (io.trino.parquet.Field)5 RowType (io.trino.spi.type.RowType)5 Optional (java.util.Optional)5 PrimitiveType (org.apache.parquet.schema.PrimitiveType)5 Test (org.testng.annotations.Test)5 ArrayType (io.trino.spi.type.ArrayType)4 MapType (io.trino.spi.type.MapType)4 Type (io.trino.spi.type.Type)4 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)3 AggregatedMemoryContext.newSimpleAggregatedMemoryContext (io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext)3