use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.
the class ParquetReader method initializeColumnReaders.
private void initializeColumnReaders() {
for (PrimitiveColumnIO columnIO : columns) {
RichColumnDescriptor column = new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType());
columnReaders[columnIO.getId()] = ColumnReaderFactory.createReader(column, batchReadEnabled);
if (enableVerification) {
verificationColumnReaders[columnIO.getId()] = ColumnReaderFactory.createReader(column, false);
}
}
}
use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.
the class PredicateUtils method dictionaryPredicatesMatch.
private static boolean dictionaryPredicatesMatch(Predicate parquetPredicate, BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain) {
for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) {
RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray()));
if (descriptor != null) {
if (isOnlyDictionaryEncodingPages(columnMetaData) && isColumnPredicate(descriptor, parquetTupleDomain)) {
byte[] buffer = new byte[toIntExact(columnMetaData.getTotalSize())];
dataSource.readFully(columnMetaData.getStartingPos(), buffer);
// Early abort, predicate already filters block so no more dictionaries need be read
if (!parquetPredicate.matches(new DictionaryDescriptor(descriptor, readDictionaryPage(buffer, columnMetaData.getCodec())))) {
return false;
}
}
}
}
return true;
}
use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.
the class DeltaPageSourceProvider method getParquetTupleDomain.
public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<DeltaColumnHandle> effectivePredicate) {
if (effectivePredicate.isNone()) {
return TupleDomain.none();
}
ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
for (Map.Entry<DeltaColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
DeltaColumnHandle columnHandle = entry.getKey();
RichColumnDescriptor descriptor;
if (isPushedDownSubfield(columnHandle)) {
Subfield pushedDownSubfield = getPushedDownSubfield(columnHandle);
List<String> subfieldPath = columnPathFromSubfield(pushedDownSubfield);
descriptor = descriptorsByPath.get(subfieldPath);
} else {
descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
}
if (descriptor != null) {
predicate.put(descriptor, entry.getValue());
}
}
return TupleDomain.withColumnDomains(predicate.build());
}
use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.
the class TestParquetPredicateUtils method testParquetTupleDomainMap.
@Test
public void testParquetTupleDomainMap() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_map", HiveType.valueOf("map<int,int>"), parseTypeSignature(StandardTypes.MAP), 0, REGULAR, Optional.empty(), Optional.empty());
MapType mapType = new MapType(INTEGER, INTEGER, methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"));
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType)));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value"))));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
assertTrue(tupleDomain.getDomains().get().isEmpty());
}
use of com.facebook.presto.parquet.RichColumnDescriptor in project presto by prestodb.
the class TestParquetPredicateUtils method testParquetTupleDomainPrimitiveArray.
@Test
public void testParquetTupleDomainPrimitiveArray() {
HiveColumnHandle columnHandle = new HiveColumnHandle("my_array", HiveType.valueOf("array<int>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty(), Optional.empty());
TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(INTEGER))));
MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array", new GroupType(REPEATED, "bag", new PrimitiveType(OPTIONAL, INT32, "array_element"))));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
assertTrue(tupleDomain.getDomains().get().isEmpty());
}
Aggregations