use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class TestDynamicFilterSourceOperator method testMultipleColumnsCollectMinMaxRangeWhenTooManyBytes.
@Test
public void testMultipleColumnsCollectMinMaxRangeWhenTooManyBytes() {
DataSize maxSize = new DataSize(10, KILOBYTE);
long maxByteSize = maxSize.toBytes();
String largeTextA = repeat("A", (int) (maxByteSize / 2) + 1);
String largeTextB = repeat("B", (int) (maxByteSize / 2) + 1);
Page largePage = new Page(createStringsBlock(largeTextA), createStringsBlock(largeTextB));
List<TupleDomain<String>> expectedTupleDomains = ImmutableList.of(TupleDomain.withColumnDomains(ImmutableMap.of("0", Domain.create(ValueSet.ofRanges(range(VARCHAR, utf8Slice(largeTextA), true, utf8Slice(largeTextA), true)), false), "1", Domain.create(ValueSet.ofRanges(range(VARCHAR, utf8Slice(largeTextB), true, utf8Slice(largeTextB), true)), false))));
assertDynamicFilters(100, maxSize, 100, ImmutableList.of(VARCHAR, VARCHAR), ImmutableList.of(largePage), expectedTupleDomains);
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class DeltaExpressionUtils method iterateWithPartitionPruning.
/**
* Utility method that takes an iterator of {@link AddFile}s and a predicate and returns an iterator of {@link AddFile}s
* that satisfy the predicate (predicate evaluates to a deterministic NO)
*/
public static CloseableIterator<AddFile> iterateWithPartitionPruning(CloseableIterator<AddFile> inputIterator, TupleDomain<DeltaColumnHandle> predicate, TypeManager typeManager) {
TupleDomain<String> partitionPredicate = extractPartitionColumnsPredicate(predicate);
if (partitionPredicate.isAll()) {
// there is no partition filter, return the input iterator as is.
return inputIterator;
}
if (partitionPredicate.isNone()) {
// nothing passes the partition predicate, return empty iterator
return new CloseableIterator<AddFile>() {
@Override
public boolean hasNext() {
return false;
}
@Override
public AddFile next() {
throw new NoSuchElementException();
}
@Override
public void close() throws IOException {
inputIterator.close();
}
};
}
List<DeltaColumnHandle> partitionColumns = predicate.getColumnDomains().get().stream().filter(entry -> entry.getColumn().getColumnType() == PARTITION).map(entry -> entry.getColumn()).collect(Collectors.toList());
return new CloseableIterator<AddFile>() {
private AddFile nextItem;
@Override
public boolean hasNext() {
if (nextItem != null) {
return true;
}
while (inputIterator.hasNext()) {
AddFile nextFile = inputIterator.next();
if (evaluatePartitionPredicate(partitionPredicate, partitionColumns, typeManager, nextFile)) {
nextItem = nextFile;
break;
}
}
return nextItem != null;
}
@Override
public AddFile next() {
if (!hasNext()) {
throw new NoSuchElementException("there are no more files");
}
AddFile toReturn = nextItem;
nextItem = null;
return toReturn;
}
@Override
public void close() throws IOException {
inputIterator.close();
}
};
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class DeltaPageSourceProvider method getParquetTupleDomain.
public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<DeltaColumnHandle> effectivePredicate) {
if (effectivePredicate.isNone()) {
return TupleDomain.none();
}
ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
for (Map.Entry<DeltaColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
DeltaColumnHandle columnHandle = entry.getKey();
RichColumnDescriptor descriptor;
if (isPushedDownSubfield(columnHandle)) {
Subfield pushedDownSubfield = getPushedDownSubfield(columnHandle);
List<String> subfieldPath = columnPathFromSubfield(pushedDownSubfield);
descriptor = descriptorsByPath.get(subfieldPath);
} else {
descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
}
if (descriptor != null) {
predicate.put(descriptor, entry.getValue());
}
}
return TupleDomain.withColumnDomains(predicate.build());
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class HiveMetadata method getMaterializedViewStatus.
@Override
public MaterializedViewStatus getMaterializedViewStatus(ConnectorSession session, SchemaTableName materializedViewName) {
MetastoreContext metastoreContext = getMetastoreContext(session);
ConnectorMaterializedViewDefinition viewDefinition = getMaterializedView(session, materializedViewName).orElseThrow(() -> new MaterializedViewNotFoundException(materializedViewName));
List<Table> baseTables = viewDefinition.getBaseTables().stream().map(baseTableName -> metastore.getTable(metastoreContext, baseTableName.getSchemaName(), baseTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(baseTableName))).collect(toImmutableList());
baseTables.forEach(table -> checkState(table.getTableType().equals(MANAGED_TABLE), format("base table %s is not a managed table", table.getTableName())));
Table materializedViewTable = metastore.getTable(metastoreContext, materializedViewName.getSchemaName(), materializedViewName.getTableName()).orElseThrow(() -> new MaterializedViewNotFoundException(materializedViewName));
checkState(materializedViewTable.getTableType().equals(MATERIALIZED_VIEW), format("materialized view table %s is not a materialized view", materializedViewTable.getTableName()));
validateMaterializedViewPartitionColumns(metastore, metastoreContext, materializedViewTable, viewDefinition);
Map<String, Map<SchemaTableName, String>> directColumnMappings = viewDefinition.getDirectColumnMappingsAsMap();
Map<SchemaTableName, Map<String, String>> viewToBasePartitionMap = getViewToBasePartitionMap(materializedViewTable, baseTables, directColumnMappings);
MaterializedDataPredicates materializedDataPredicates = getMaterializedDataPredicates(metastore, metastoreContext, typeManager, materializedViewTable, timeZone);
if (materializedDataPredicates.getPredicateDisjuncts().isEmpty()) {
return new MaterializedViewStatus(NOT_MATERIALIZED);
}
// Partitions to keep track of for materialized view freshness are the partitions of every base table
// that are not available/updated to the materialized view yet.
Map<SchemaTableName, MaterializedDataPredicates> partitionsFromBaseTables = baseTables.stream().collect(toImmutableMap(baseTable -> new SchemaTableName(baseTable.getDatabaseName(), baseTable.getTableName()), baseTable -> {
MaterializedDataPredicates baseTableMaterializedPredicates = getMaterializedDataPredicates(metastore, metastoreContext, typeManager, baseTable, timeZone);
SchemaTableName schemaTableName = new SchemaTableName(baseTable.getDatabaseName(), baseTable.getTableName());
Map<String, String> viewToBaseIndirectMappedColumns = viewToBaseTableOnOuterJoinSideIndirectMappedPartitions(viewDefinition, baseTable).orElse(ImmutableMap.of());
return differenceDataPredicates(baseTableMaterializedPredicates, materializedDataPredicates, viewToBasePartitionMap.getOrDefault(schemaTableName, ImmutableMap.of()), viewToBaseIndirectMappedColumns);
}));
for (MaterializedDataPredicates dataPredicates : partitionsFromBaseTables.values()) {
if (!dataPredicates.getPredicateDisjuncts().isEmpty()) {
if (dataPredicates.getPredicateDisjuncts().stream().mapToInt(tupleDomain -> tupleDomain.getDomains().isPresent() ? tupleDomain.getDomains().get().size() : 0).sum() > HiveSessionProperties.getMaterializedViewMissingPartitionsThreshold(session)) {
return new MaterializedViewStatus(TOO_MANY_PARTITIONS_MISSING, partitionsFromBaseTables);
}
return new MaterializedViewStatus(PARTIALLY_MATERIALIZED, partitionsFromBaseTables);
}
}
return new MaterializedViewStatus(FULLY_MATERIALIZED);
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class HivePageSourceProvider method getPageSourceFromCursorProvider.
private static Optional<ConnectorPageSource> getPageSourceFromCursorProvider(Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> hiveColumns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, SchemaTableName tableName, List<HiveColumnHandle> partitionKeyColumnHandles, List<Column> tableDataColumns, Map<String, String> tableParameters, int partitionDataColumnCount, TableToPartitionMapping tableToPartitionMapping, boolean s3SelectPushdownEnabled, RowExpression remainingPredicate, boolean isPushdownFilterEnabled, RowExpressionService rowExpressionService, Map<String, String> customSplitInfo, List<HiveColumnHandle> allColumns, List<ColumnMapping> columnMappings, Set<Integer> outputIndices, List<ColumnMapping> regularAndInterimColumnMappings, Optional<BucketAdaptation> bucketAdaptation) {
if (!hiveColumns.isEmpty() && hiveColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
}
for (HiveRecordCursorProvider provider : cursorProviders) {
// GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
List<Column> partitionDataColumns = reconstructPartitionSchema(tableDataColumns, partitionDataColumnCount, tableToPartitionMapping.getPartitionSchemaDifference(), tableToPartitionMapping.getTableToPartitionColumns());
Properties schema = getHiveSchema(storage, partitionDataColumns, tableDataColumns, tableParameters, tableName.getSchemaName(), tableName.getTableName(), partitionKeyColumnHandles.stream().map(column -> column.getName()).collect(toImmutableList()), partitionKeyColumnHandles.stream().map(column -> column.getHiveType()).collect(toImmutableList()));
Optional<RecordCursor> cursor = provider.createRecordCursor(configuration, session, path, start, length, fileSize, schema, toColumnHandles(regularAndInterimColumnMappings, doCoercion), effectivePredicate, hiveStorageTimeZone, typeManager, s3SelectPushdownEnabled, customSplitInfo);
if (cursor.isPresent()) {
RecordCursor delegate = cursor.get();
if (bucketAdaptation.isPresent()) {
delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
}
// Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
if (doCoercion) {
delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
}
HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, hiveStorageTimeZone, typeManager, delegate);
List<Type> columnTypes = allColumns.stream().map(input -> typeManager.getType(input.getTypeSignature())).collect(toList());
RecordPageSource recordPageSource = new RecordPageSource(columnTypes, hiveRecordCursor);
if (isPushdownFilterEnabled) {
return Optional.of(new FilteringPageSource(columnMappings, effectivePredicate, remainingPredicate, typeManager, rowExpressionService, session, outputIndices, recordPageSource));
}
return Optional.of(recordPageSource);
}
}
return Optional.empty();
}
Aggregations