use of io.prestosql.spi.plan.FilterStatsCalculatorService in project boostkit-bigdata by kunpengcompute.
the class HiveFilterPushdown method evaluateFilterBenefit.
private static boolean evaluateFilterBenefit(ConnectorTableHandle tableHandle, Map<String, ColumnHandle> columnHandlesMap, HiveMetadata metadata, FilterStatsCalculatorService filterCalculatorService, RowExpression predicate, Constraint constraint, ConnectorSession session, Map<String, Type> typesMap) {
TableStatistics statistics = metadata.getTableStatistics(session, tableHandle, constraint, true);
if (statistics.getRowCount().isUnknown() || statistics.getRowCount().getValue() < HiveSessionProperties.getMinOffloadRowNumber(session)) {
log.info("Filter:Table %s row number[%d], expect min row number[%d], predicate[%s].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), HiveSessionProperties.getMinOffloadRowNumber(session), predicate.toString());
return false;
}
Set<String> predicateVariables = HivePushdownUtil.extractAll(predicate).stream().map(VariableReferenceExpression::getName).collect(Collectors.toSet());
Map<ColumnHandle, String> allColumns = columnHandlesMap.entrySet().stream().filter(entry -> predicateVariables.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
Map<String, Type> allColumnTypes = allColumns.entrySet().stream().collect(toImmutableMap(entry -> entry.getValue(), entry -> metadata.getColumnMetadata(session, tableHandle, entry.getKey()).getType()));
Map<Symbol, Type> symbolsMap = typesMap.entrySet().stream().collect(Collectors.toMap(entry -> new Symbol(entry.getKey()), entry -> entry.getValue()));
allColumnTypes.forEach((key, value) -> {
if (!symbolsMap.containsKey(key)) {
symbolsMap.put(new Symbol(key), value);
}
});
TableStatistics filterStatistics = filterCalculatorService.filterStats(statistics, predicate, session, allColumns, allColumnTypes, symbolsMap, formSymbolsLayout(allColumns));
Estimate filteredRowCount = filterStatistics.getRowCount().isUnknown() ? statistics.getRowCount() : filterStatistics.getRowCount();
double filterFactor = filteredRowCount.getValue() / statistics.getRowCount().getValue();
if (filterFactor <= HiveSessionProperties.getFilterOffloadFactor(session)) {
log.info("Offloading: table %s, size[%d], predicate[%s], filter factor[%.2f%%].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), predicate.toString(), filterFactor * 100);
return true;
} else {
log.info("No need to offload: table %s, size[%d], predicate[%s], filter factor[%.2f%%].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), predicate.toString(), filterFactor * 100);
}
return false;
}
use of io.prestosql.spi.plan.FilterStatsCalculatorService in project boostkit-bigdata by kunpengcompute.
the class TestHivePlanOptimizerProvider method testProvider.
@Test
public void testProvider() {
RowExpressionService expressionService = new ConnectorRowExpressionService(new RowExpressionDomainTranslator(OFFLOAD_METADATA), new RowExpressionDeterminismEvaluator(OFFLOAD_METADATA));
HiveTransactionManager transactionManager = simulationHiveTransactionManager();
StandardFunctionResolution resolution = new FunctionResolution(OFFLOAD_METADATA.getFunctionAndTypeManager());
HivePartitionManager partitionManager = new HivePartitionManager(OFFLOAD_METADATA.getFunctionAndTypeManager(), 1, false, 1);
ScalarStatsCalculator scalarStatsCalculator = new ScalarStatsCalculator(OFFLOAD_METADATA);
StatsNormalizer normalizer = new StatsNormalizer();
FilterStatsCalculator statsCalculator = new FilterStatsCalculator(OFFLOAD_METADATA, scalarStatsCalculator, normalizer);
FilterStatsCalculatorService calculatorService = new ConnectorFilterStatsCalculatorService(statsCalculator);
HiveMetadataFactory hiveMetadataFactory = Mockito.mock(HiveMetadataFactory.class);
HiveMetadata hiveMetadata = simulationHiveMetadata();
Mockito.when(hiveMetadataFactory.get()).thenReturn(hiveMetadata);
HivePlanOptimizerProvider hivePlanOptimizerProvider = new HivePlanOptimizerProvider(transactionManager, expressionService, resolution, partitionManager, OFFLOAD_METADATA.getFunctionAndTypeManager(), calculatorService, hiveMetadataFactory);
assertEquals(hivePlanOptimizerProvider.getLogicalPlanOptimizers().size(), 3);
assertEquals(hivePlanOptimizerProvider.getPhysicalPlanOptimizers().size(), 3);
}
use of io.prestosql.spi.plan.FilterStatsCalculatorService in project boostkit-bigdata by kunpengcompute.
the class TestHiveFilterPushdown method createOptimizer.
private static HiveFilterPushdown createOptimizer() {
RowExpressionService expressionService = new ConnectorRowExpressionService(new RowExpressionDomainTranslator(OFFLOAD_METADATA), new RowExpressionDeterminismEvaluator(OFFLOAD_METADATA));
HiveTransactionManager transactionManager = simulationHiveTransactionManager();
StandardFunctionResolution resolution = new FunctionResolution(OFFLOAD_METADATA.getFunctionAndTypeManager());
HivePartitionManager partitionManager = new HivePartitionManager(OFFLOAD_METADATA.getFunctionAndTypeManager(), 1, false, 1);
ScalarStatsCalculator scalarStatsCalculator = new ScalarStatsCalculator(OFFLOAD_METADATA);
StatsNormalizer normalizer = new StatsNormalizer();
FilterStatsCalculator statsCalculator = new FilterStatsCalculator(OFFLOAD_METADATA, scalarStatsCalculator, normalizer);
FilterStatsCalculatorService calculatorService = new ConnectorFilterStatsCalculatorService(statsCalculator);
HiveFilterPushdown optimizer = new HiveFilterPushdown(transactionManager, expressionService, resolution, partitionManager, calculatorService, OFFLOAD_METADATA.getFunctionAndTypeManager());
return optimizer;
}
use of io.prestosql.spi.plan.FilterStatsCalculatorService in project boostkit-bigdata by kunpengcompute.
the class HiveFilterPushdown method pushdownFilter.
private static ConnectorPushdownFilterResult pushdownFilter(HiveMetadata metadata, ConnectorSession session, ConnectorTableHandle tableHandle, RowExpression predicate, Map<String, Type> typesMap, RowExpressionService rowExpressionService, StandardFunctionResolution functionResolution, FunctionMetadataManager functionMetadataManager, FilterStatsCalculatorService filterCalculatorService) {
checkArgument(!FALSE_CONSTANT.equals(predicate), "Cannot pushdown filter that is always false");
checkArgument(tableHandle instanceof HiveTableHandle, "Only supports hive TableHandle");
LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(rowExpressionService.getDeterminismEvaluator(), functionResolution, functionMetadataManager);
ExpressionExtractResult expressionExtractResult = extractOffloadExpression(predicate, logicalRowExpressions, rowExpressionService);
if (TRUE_CONSTANT.equals(expressionExtractResult.getOffloadExpression())) {
return new ConnectorPushdownFilterResult(Optional.empty(), TRUE_CONSTANT);
}
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
Map<String, ColumnHandle> columnHandlesMap = metadata.getColumnHandles(session, tableHandle);
HiveOffloadExpression oldOffloadExpression = hiveTableHandle.getOffloadExpression();
RowExpression filterExpression = TRUE_CONSTANT.equals(oldOffloadExpression.getFilterExpression()) ? expressionExtractResult.getOffloadExpression() : logicalRowExpressions.combineConjuncts(oldOffloadExpression.getFilterExpression(), expressionExtractResult.getOffloadExpression());
RowExpression optimizedExpression = filterExpression;
if (true != determineOffloadExpression(optimizedExpression, tableHandle, metadata, session, rowExpressionService, columnHandlesMap, filterCalculatorService, typesMap)) {
return new ConnectorPushdownFilterResult(Optional.empty(), TRUE_CONSTANT);
}
Set<HiveColumnHandle> offloadColumns = HivePushdownUtil.extractAll(optimizedExpression).stream().map(entry -> (HiveColumnHandle) columnHandlesMap.get(entry.getName())).collect(Collectors.toSet());
Optional<ConnectorTableHandle> newTableHandle = Optional.of(hiveTableHandle.withOffloadExpression(oldOffloadExpression.updateFilter(optimizedExpression, offloadColumns)));
return new ConnectorPushdownFilterResult(newTableHandle, expressionExtractResult.getRemainingExpression());
}
Aggregations