use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.
the class TestParquetPredicateUtils method testParquetTupleDomainMap.
@Test
public void testParquetTupleDomainMap() {
MapType mapType = new MapType(INTEGER, INTEGER, new TypeOperators());
HiveColumnHandle columnHandle = createBaseColumn("my_map", 0, HiveType.valueOf("map<int,int>"), mapType, REGULAR, Optional.empty());
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType)));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value"))));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);
assertTrue(tupleDomain.isAll());
}
use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.
the class TestParquetPredicateUtils method testParquetTupleDomainStruct.
@Test
public void testParquetTupleDomainStruct() {
RowType rowType = rowType(RowType.field("a", INTEGER), RowType.field("b", INTEGER));
HiveColumnHandle columnHandle = createBaseColumn("my_struct", 0, HiveType.valueOf("struct<a:int,b:int>"), rowType, REGULAR, Optional.empty());
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType)));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_struct", new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b")));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain, fileSchema, true);
assertTrue(tupleDomain.isAll());
}
use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.
the class TupleDomainParquetPredicate method matches.
@Override
public boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> statistics, ParquetDataSourceId id) throws ParquetCorruptionException {
if (numberOfRows == 0) {
return false;
}
if (effectivePredicate.isNone()) {
return false;
}
Map<ColumnDescriptor, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains"));
for (RichColumnDescriptor column : columns) {
Domain effectivePredicateDomain = effectivePredicateDomains.get(column);
if (effectivePredicateDomain == null) {
continue;
}
Statistics<?> columnStatistics = statistics.get(column);
if (columnStatistics == null || columnStatistics.isEmpty()) {
// no stats for column
continue;
}
Domain domain = getDomain(column, effectivePredicateDomain.getType(), numberOfRows, columnStatistics, id, timeZone);
if (!effectivePredicateDomain.overlaps(domain)) {
return false;
}
}
return true;
}
use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.
the class TupleDomainParquetPredicate method matches.
@Override
public boolean matches(long numberOfRows, ColumnIndexStore columnIndexStore, ParquetDataSourceId id) throws ParquetCorruptionException {
requireNonNull(columnIndexStore, "columnIndexStore is null");
if (numberOfRows == 0) {
return false;
}
if (effectivePredicate.isNone()) {
return false;
}
Map<ColumnDescriptor, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains"));
for (RichColumnDescriptor column : columns) {
Domain effectivePredicateDomain = effectivePredicateDomains.get(column);
if (effectivePredicateDomain == null) {
continue;
}
ColumnIndex columnIndex = columnIndexStore.getColumnIndex(ColumnPath.get(column.getPath()));
if (columnIndex == null) {
continue;
}
Domain domain = getDomain(effectivePredicateDomain.getType(), numberOfRows, columnIndex, id, column, timeZone);
if (!effectivePredicateDomain.overlaps(domain)) {
return false;
}
}
return true;
}
use of io.trino.parquet.RichColumnDescriptor in project trino by trinodb.
the class ParquetReader method initializeColumnReaders.
private void initializeColumnReaders() {
for (PrimitiveColumnIO columnIO : columns) {
RichColumnDescriptor column = new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
columnReaders[columnIO.getId()] = PrimitiveColumnReader.createReader(column, timeZone);
}
}
Aggregations