use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TupleDomainParquetPredicate method getDomain.
@VisibleForTesting
public static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescriptor) {
if (dictionaryDescriptor == null) {
return Domain.all(type);
}
ColumnDescriptor columnDescriptor = dictionaryDescriptor.getColumnDescriptor();
Optional<DictionaryPage> dictionaryPage = dictionaryDescriptor.getDictionaryPage();
if (!dictionaryPage.isPresent()) {
return Domain.all(type);
}
Dictionary dictionary;
try {
dictionary = dictionaryPage.get().getEncoding().initDictionary(columnDescriptor, dictionaryPage.get());
} catch (Exception e) {
// OK to ignore exception when reading dictionaries
return Domain.all(type);
}
int dictionarySize = dictionaryPage.get().getDictionarySize();
DictionaryValueConverter converter = new DictionaryValueConverter(dictionary);
Function<Integer, Object> convertFunction = converter.getConverter(columnDescriptor.getPrimitiveType());
List<Object> values = new ArrayList<>();
for (int i = 0; i < dictionarySize; i++) {
values.add(convertFunction.apply(i));
}
// TODO: when min == max (i.e., singleton ranges, the construction of Domains can be done more efficiently
return getDomain(columnDescriptor, type, values, values, true);
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class DeltaPageSourceProvider method getParquetTupleDomain.
public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<DeltaColumnHandle> effectivePredicate) {
if (effectivePredicate.isNone()) {
return TupleDomain.none();
}
ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
for (Map.Entry<DeltaColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
DeltaColumnHandle columnHandle = entry.getKey();
RichColumnDescriptor descriptor;
if (isPushedDownSubfield(columnHandle)) {
Subfield pushedDownSubfield = getPushedDownSubfield(columnHandle);
List<String> subfieldPath = columnPathFromSubfield(pushedDownSubfield);
descriptor = descriptorsByPath.get(subfieldPath);
} else {
descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
}
if (descriptor != null) {
predicate.put(descriptor, entry.getValue());
}
}
return TupleDomain.withColumnDomains(predicate.build());
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestParquetPredicateUtils method testParquetTupleDomainMap.
@Test
public void testParquetTupleDomainMap() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_map", HiveType.valueOf("map<int,int>"), parseTypeSignature(StandardTypes.MAP), 0, REGULAR, Optional.empty(), Optional.empty());
MapType mapType = new MapType(INTEGER, INTEGER, methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"));
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType)));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value"))));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
assertTrue(tupleDomain.getDomains().get().isEmpty());
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestParquetPredicateUtils method testParquetTupleDomainPrimitiveArray.
@Test
public void testParquetTupleDomainPrimitiveArray() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_array", HiveType.valueOf("array<int>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty(), Optional.empty());
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(INTEGER))));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array", new GroupType(REPEATED, "bag", new PrimitiveType(OPTIONAL, INT32, "array_element"))));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
assertTrue(tupleDomain.getDomains().get().isEmpty());
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestParquetPredicateUtils method testParquetTupleDomainStruct.
@Test
public void testParquetTupleDomainStruct() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_struct", HiveType.valueOf("struct<a:int,b:int>"), parseTypeSignature(StandardTypes.ROW), 0, REGULAR, Optional.empty(), Optional.empty());
RowType.Field rowField = new RowType.Field(Optional.of("my_struct"), INTEGER);
RowType rowType = RowType.from(ImmutableList.of(rowField));
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType)));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_struct", new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b")));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
assertTrue(tupleDomain.getDomains().get().isEmpty());
}
Aggregations