use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class HivePartitionManager method getOrLoadPartitions.
public List<HivePartition> getOrLoadPartitions(ConnectorSession session, SemiTransactionalHiveMetastore metastore, HiveIdentity identity, HiveTableHandle tableHandle) {
SchemaTableName tableName = tableHandle.getSchemaTableName();
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
return tableHandle.getPartitions().orElseGet(() -> getPartitionsAsList(getPartitions(metastore, identity, tableHandle, new Constraint(tableHandle.getEnforcedConstraint()), table)));
}
use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class HivePartitionManager method getPartitions.
public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, HiveIdentity identity, ConnectorTableHandle tableHandle, Constraint constraint, Table table) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary().intersect(hiveTableHandle.getEnforcedConstraint());
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Optional<HiveBucketHandle> hiveBucketHandle = hiveTableHandle.getBucketHandle();
List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();
if (effectivePredicate.isNone()) {
return new HivePartitionResult(partitionColumns, ImmutableList.of(), none(), none(), none(), hiveBucketHandle, Optional.empty());
}
Optional<HiveBucketing.HiveBucketFilter> bucketFilter = HiveBucketing.getHiveBucketFilter(table, effectivePredicate);
TupleDomain<HiveColumnHandle> compactEffectivePredicate = toCompactTupleDomain(effectivePredicate, domainCompactionThreshold);
if (partitionColumns.isEmpty()) {
return new HivePartitionResult(partitionColumns, ImmutableList.of(new HivePartition(tableName)), compactEffectivePredicate, effectivePredicate, all(), hiveBucketHandle, bucketFilter);
}
List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
Iterable<HivePartition> partitionsIterable;
Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);
if (hiveTableHandle.getPartitions().isPresent()) {
partitionsIterable = hiveTableHandle.getPartitions().get().stream().filter(partition -> partitionMatches(partitionColumns, effectivePredicate, predicate, partition)).collect(toImmutableList());
} else {
List<String> partitionNames = getFilteredPartitionNames(metastore, identity, tableName, partitionColumns, effectivePredicate, table);
partitionsIterable = () -> partitionNames.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, effectivePredicate, predicate)).filter(Optional::isPresent).map(Optional::get).iterator();
}
// All partition key domains will be fully evaluated, so we don't need to include those
TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), not(Predicates.in(partitionColumns))));
TupleDomain<ColumnHandle> enforcedTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), Predicates.in(partitionColumns)));
return new HivePartitionResult(partitionColumns, partitionsIterable, compactEffectivePredicate, remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle, bucketFilter);
}
use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class HiveFilterPushdown method evaluateFilterBenefit.
private static boolean evaluateFilterBenefit(ConnectorTableHandle tableHandle, Map<String, ColumnHandle> columnHandlesMap, HiveMetadata metadata, FilterStatsCalculatorService filterCalculatorService, RowExpression predicate, Constraint constraint, ConnectorSession session, Map<String, Type> typesMap) {
TableStatistics statistics = metadata.getTableStatistics(session, tableHandle, constraint, true);
if (statistics.getRowCount().isUnknown() || statistics.getRowCount().getValue() < HiveSessionProperties.getMinOffloadRowNumber(session)) {
log.info("Filter:Table %s row number[%d], expect min row number[%d], predicate[%s].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), HiveSessionProperties.getMinOffloadRowNumber(session), predicate.toString());
return false;
}
Set<String> predicateVariables = HivePushdownUtil.extractAll(predicate).stream().map(VariableReferenceExpression::getName).collect(Collectors.toSet());
Map<ColumnHandle, String> allColumns = columnHandlesMap.entrySet().stream().filter(entry -> predicateVariables.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
Map<String, Type> allColumnTypes = allColumns.entrySet().stream().collect(toImmutableMap(entry -> entry.getValue(), entry -> metadata.getColumnMetadata(session, tableHandle, entry.getKey()).getType()));
Map<Symbol, Type> symbolsMap = typesMap.entrySet().stream().collect(Collectors.toMap(entry -> new Symbol(entry.getKey()), entry -> entry.getValue()));
allColumnTypes.forEach((key, value) -> {
if (!symbolsMap.containsKey(key)) {
symbolsMap.put(new Symbol(key), value);
}
});
TableStatistics filterStatistics = filterCalculatorService.filterStats(statistics, predicate, session, allColumns, allColumnTypes, symbolsMap, formSymbolsLayout(allColumns));
Estimate filteredRowCount = filterStatistics.getRowCount().isUnknown() ? statistics.getRowCount() : filterStatistics.getRowCount();
double filterFactor = filteredRowCount.getValue() / statistics.getRowCount().getValue();
if (filterFactor <= HiveSessionProperties.getFilterOffloadFactor(session)) {
log.info("Offloading: table %s, size[%d], predicate[%s], filter factor[%.2f%%].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), predicate.toString(), filterFactor * 100);
return true;
} else {
log.info("No need to offload: table %s, size[%d], predicate[%s], filter factor[%.2f%%].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), predicate.toString(), filterFactor * 100);
}
return false;
}
use of io.prestosql.spi.connector.Constraint in project hetu-core by openlookeng.
the class TestHBaseConnector method testApplyFilter.
/**
* testApplyFilter
*/
@Test
public void testApplyFilter() {
Constraint constraint = new Constraint(TestUtils.createTupleDomain(5));
Optional<ConstraintApplicationResult<ConnectorTableHandle>> result = hcm.applyFilter(session, TestUtils.createHBaseTableHandle(), constraint);
assertEquals(true, result.isPresent());
}
use of io.prestosql.spi.connector.Constraint in project hetu-core by openlookeng.
the class HiveMetadata method applyFilter.
@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint, List<Constraint> disjuctConstaints, Set<ColumnHandle> allColumnHandles, boolean pushPartitionsOnly) {
HiveIdentity identity = new HiveIdentity(session);
HiveTableHandle handle = (HiveTableHandle) tableHandle;
checkArgument(!handle.getAnalyzePartitionValues().isPresent() || constraint.getSummary().isAll(), "Analyze should not have a constraint");
SchemaTableName tableName = handle.getSchemaTableName();
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, identity, handle, constraint, table);
HiveTableHandle newHandle = partitionManager.applyPartitionResult(handle, partitionResult);
// the goal here is to pushdown all the constraints/predicates to HivePageSourceProvider
// in case some pre-filtering can be done using the heuristic-index
// however, during scheduling we can't be sure a column will have a heuristic-index.
// therefore, filtering should still be done using the filter operator,
// hence the unenforced constraints below includes all constraints (minus partitions)
ImmutableMap.Builder<HiveColumnHandle, Domain> pushedDown = ImmutableMap.builder();
pushedDown.putAll(partitionResult.getUnenforcedConstraint().getDomains().get().entrySet().stream().collect(toMap(e -> (HiveColumnHandle) e.getKey(), e -> e.getValue())));
TupleDomain<HiveColumnHandle> newEffectivePredicate = newHandle.getCompactEffectivePredicate().intersect(handle.getCompactEffectivePredicate()).intersect(withColumnDomains(pushedDown.build()));
ImmutableList.Builder<TupleDomain<HiveColumnHandle>> builder = ImmutableList.builder();
disjuctConstaints.stream().forEach(c -> {
TupleDomain<HiveColumnHandle> newSubDomain = withColumnDomains(c.getSummary().getDomains().get().entrySet().stream().collect(toMap(e -> (HiveColumnHandle) e.getKey(), e -> e.getValue()))).subtract(newEffectivePredicate);
if (!newSubDomain.isNone()) {
builder.add(newSubDomain);
}
});
// Get list of all columns involved in predicate
Set<String> predicateColumnNames = new HashSet<>();
newEffectivePredicate.getDomains().get().keySet().stream().map(HiveColumnHandle::getColumnName).forEach(predicateColumnNames::add);
List<TupleDomain<HiveColumnHandle>> newEffectivePredicates = null;
boolean isSuitableToPush = false;
if (HiveSessionProperties.isOrcPredicatePushdownEnabled(session)) {
isSuitableToPush = checkIfSuitableToPush(allColumnHandles, tableHandle, session);
}
if (isSuitableToPush && HiveSessionProperties.isOrcDisjunctPredicatePushdownEnabled(session)) {
newEffectivePredicates = builder.build();
newEffectivePredicates.stream().forEach(nfp -> nfp.getDomains().get().keySet().stream().map(HiveColumnHandle::getColumnName).forEach(predicateColumnNames::add));
}
if (isSuitableToPush && partitionResult.getEnforcedConstraint().equals(newEffectivePredicate) && (newEffectivePredicates == null || newEffectivePredicates.size() == 0)) {
isSuitableToPush = false;
}
// Get column handle
Map<String, ColumnHandle> columnHandles = getColumnHandles(table);
// map predicate columns to hive column handles
Map<String, HiveColumnHandle> predicateColumns = predicateColumnNames.stream().map(columnHandles::get).map(HiveColumnHandle.class::cast).filter(HiveColumnHandle::isRegular).collect(toImmutableMap(HiveColumnHandle::getName, identity()));
newHandle = new HiveTableHandle(newHandle.getSchemaName(), newHandle.getTableName(), newHandle.getTableParameters(), newHandle.getPartitionColumns(), newHandle.getPartitions(), newEffectivePredicate, newHandle.getEnforcedConstraint(), newHandle.getBucketHandle(), newHandle.getBucketFilter(), newHandle.getAnalyzePartitionValues(), predicateColumns, Optional.ofNullable(newEffectivePredicates), isSuitableToPush);
if (pushPartitionsOnly && handle.getPartitions().equals(newHandle.getPartitions()) && handle.getCompactEffectivePredicate().equals(newHandle.getCompactEffectivePredicate()) && handle.getBucketFilter().equals(newHandle.getBucketFilter())) {
return Optional.empty();
}
if (!pushPartitionsOnly && isSuitableToPush) {
return Optional.of(new ConstraintApplicationResult<>(newHandle, TupleDomain.all()));
}
// note here that all unenforced constraints will still be applied using the filter operator
return Optional.of(new ConstraintApplicationResult<>(newHandle, partitionResult.getUnenforcedConstraint()));
}
Aggregations