use of io.prestosql.parquet.RichColumnDescriptor in project hetu-core by openlookeng.
the class PredicateUtils method getDictionaries.
private static Map<ColumnDescriptor, DictionaryDescriptor> getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain) {
ImmutableMap.Builder<ColumnDescriptor, DictionaryDescriptor> dictionaries = ImmutableMap.builder();
for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) {
RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray()));
if (descriptor != null) {
if (isOnlyDictionaryEncodingPages(columnMetaData) && isColumnPredicate(descriptor, parquetTupleDomain)) {
int totalSize = toIntExact(columnMetaData.getTotalSize());
byte[] buffer = new byte[totalSize];
dataSource.readFully(columnMetaData.getStartingPos(), buffer);
Optional<DictionaryPage> dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec());
dictionaries.put(descriptor, new DictionaryDescriptor(descriptor, dictionaryPage));
break;
}
}
}
return dictionaries.build();
}
use of io.prestosql.parquet.RichColumnDescriptor in project hetu-core by openlookeng.
the class ParquetPageSourceFactory method getParquetTupleDomain.
public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<HiveColumnHandle> effectivePredicate) {
if (effectivePredicate.isNone()) {
return TupleDomain.none();
}
ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
for (Entry<HiveColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
HiveColumnHandle columnHandle = entry.getKey();
// skip looking up predicates for complex types as Parquet only stores stats for primitives
if (!columnHandle.getHiveType().getCategory().equals(PRIMITIVE)) {
continue;
}
RichColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
if (descriptor != null) {
predicate.put(descriptor, entry.getValue());
}
}
return TupleDomain.withColumnDomains(predicate.build());
}
use of io.prestosql.parquet.RichColumnDescriptor in project hetu-core by openlookeng.
the class TestParquetPredicateUtils method testParquetTupleDomainMap.
@Test
public void testParquetTupleDomainMap() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_map", HiveType.valueOf("map<int,int>"), parseTypeSignature(StandardTypes.MAP), 0, REGULAR, Optional.empty());
MapType mapType = new MapType(INTEGER, INTEGER, methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"));
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType)));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value"))));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = ParquetPageSourceFactory.getParquetTupleDomain(descriptorsByPath, domain);
assertTrue(tupleDomain.getDomains().get().isEmpty());
}
use of io.prestosql.parquet.RichColumnDescriptor in project hetu-core by openlookeng.
the class TestParquetPredicateUtils method testParquetTupleDomainStructArray.
@Test
public void testParquetTupleDomainStructArray() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_array_struct", HiveType.valueOf("array<struct<a:int>>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty());
RowType.Field rowField = new RowType.Field(Optional.of("a"), INTEGER);
RowType rowType = RowType.from(ImmutableList.of(rowField));
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(rowType))));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array_struct", new GroupType(REPEATED, "bag", new GroupType(OPTIONAL, "array_element", new PrimitiveType(OPTIONAL, INT32, "a")))));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = ParquetPageSourceFactory.getParquetTupleDomain(descriptorsByPath, domain);
assertTrue(tupleDomain.getDomains().get().isEmpty());
}
use of io.prestosql.parquet.RichColumnDescriptor in project hetu-core by openlookeng.
the class TestParquetPredicateUtils method testParquetTupleDomainPrimitive.
@Test
public void testParquetTupleDomainPrimitive() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_primitive", HiveType.valueOf("bigint"), parseTypeSignature(StandardTypes.BIGINT), 0, REGULAR, Optional.empty());
Domain singleValueDomain = Domain.singleValue(BIGINT, 123L);
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, singleValueDomain));
MessageType fileSchema = new MessageType("hive_schema", new PrimitiveType(OPTIONAL, INT64, "my_primitive"));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = ParquetPageSourceFactory.getParquetTupleDomain(descriptorsByPath, domain);
assertEquals(tupleDomain.getDomains().get().size(), 1);
ColumnDescriptor descriptor = tupleDomain.getDomains().get().keySet().iterator().next();
assertEquals(descriptor.getPath().length, 1);
assertEquals(descriptor.getPath()[0], "my_primitive");
Domain predicateDomain = Iterables.getOnlyElement(tupleDomain.getDomains().get().values());
assertEquals(predicateDomain, singleValueDomain);
}
Aggregations