Search in sources :

Example 1 with HiveMetadata

use of io.prestosql.plugin.hive.HiveMetadata in project boostkit-bigdata by kunpengcompute.

the class HiveFilterPushdown method evaluateFilterBenefit.

private static boolean evaluateFilterBenefit(ConnectorTableHandle tableHandle, Map<String, ColumnHandle> columnHandlesMap, HiveMetadata metadata, FilterStatsCalculatorService filterCalculatorService, RowExpression predicate, Constraint constraint, ConnectorSession session, Map<String, Type> typesMap) {
    TableStatistics statistics = metadata.getTableStatistics(session, tableHandle, constraint, true);
    if (statistics.getRowCount().isUnknown() || statistics.getRowCount().getValue() < HiveSessionProperties.getMinOffloadRowNumber(session)) {
        log.info("Filter:Table %s row number[%d], expect min row number[%d], predicate[%s].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), HiveSessionProperties.getMinOffloadRowNumber(session), predicate.toString());
        return false;
    }
    Set<String> predicateVariables = HivePushdownUtil.extractAll(predicate).stream().map(VariableReferenceExpression::getName).collect(Collectors.toSet());
    Map<ColumnHandle, String> allColumns = columnHandlesMap.entrySet().stream().filter(entry -> predicateVariables.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
    Map<String, Type> allColumnTypes = allColumns.entrySet().stream().collect(toImmutableMap(entry -> entry.getValue(), entry -> metadata.getColumnMetadata(session, tableHandle, entry.getKey()).getType()));
    Map<Symbol, Type> symbolsMap = typesMap.entrySet().stream().collect(Collectors.toMap(entry -> new Symbol(entry.getKey()), entry -> entry.getValue()));
    allColumnTypes.forEach((key, value) -> {
        if (!symbolsMap.containsKey(key)) {
            symbolsMap.put(new Symbol(key), value);
        }
    });
    TableStatistics filterStatistics = filterCalculatorService.filterStats(statistics, predicate, session, allColumns, allColumnTypes, symbolsMap, formSymbolsLayout(allColumns));
    Estimate filteredRowCount = filterStatistics.getRowCount().isUnknown() ? statistics.getRowCount() : filterStatistics.getRowCount();
    double filterFactor = filteredRowCount.getValue() / statistics.getRowCount().getValue();
    if (filterFactor <= HiveSessionProperties.getFilterOffloadFactor(session)) {
        log.info("Offloading: table %s, size[%d], predicate[%s], filter factor[%.2f%%].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), predicate.toString(), filterFactor * 100);
        return true;
    } else {
        log.info("No need to offload: table %s, size[%d], predicate[%s], filter factor[%.2f%%].", tableHandle.getTableName(), (long) statistics.getRowCount().getValue(), predicate.toString(), filterFactor * 100);
    }
    return false;
}
Also used : ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HivePartitionManager(io.prestosql.plugin.hive.HivePartitionManager) TableStatistics(io.prestosql.spi.statistics.TableStatistics) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) ConstantExpression(io.prestosql.spi.relation.ConstantExpression) FALSE_CONSTANT(io.prestosql.expressions.LogicalRowExpressions.FALSE_CONSTANT) LogicalRowExpressions(io.prestosql.expressions.LogicalRowExpressions) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) NullableValue(io.prestosql.spi.predicate.NullableValue) CallExpression(io.prestosql.spi.relation.CallExpression) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) FilterNode(io.prestosql.spi.plan.FilterNode) Map(java.util.Map) FunctionMetadataManager(io.prestosql.spi.function.FunctionMetadataManager) Type(io.prestosql.spi.type.Type) RowExpressionNodeInliner.replaceExpression(io.prestosql.expressions.RowExpressionNodeInliner.replaceExpression) Constraint(io.prestosql.spi.connector.Constraint) BiMap(com.google.common.collect.BiMap) ImmutableSet(com.google.common.collect.ImmutableSet) DomainTranslator(io.prestosql.spi.relation.DomainTranslator) PlanVisitor(io.prestosql.spi.plan.PlanVisitor) TableScanNode(io.prestosql.spi.plan.TableScanNode) Set(java.util.Set) PlanNode(io.prestosql.spi.plan.PlanNode) Collectors(java.util.stream.Collectors) Preconditions.checkState(com.google.common.base.Preconditions.checkState) RowExpressionService(io.prestosql.spi.relation.RowExpressionService) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HiveOffloadExpression(io.prestosql.plugin.hive.HiveOffloadExpression) Optional(java.util.Optional) LogicalRowExpressions.extractConjuncts(io.prestosql.expressions.LogicalRowExpressions.extractConjuncts) TRUE_CONSTANT(io.prestosql.expressions.LogicalRowExpressions.TRUE_CONSTANT) Logger(io.airlift.log.Logger) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) StandardFunctionResolution(io.prestosql.spi.function.StandardFunctionResolution) TableHandle(io.prestosql.spi.metadata.TableHandle) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HiveTransactionManager(io.prestosql.plugin.hive.HiveTransactionManager) LinkedHashMap(java.util.LinkedHashMap) ImmutableList(com.google.common.collect.ImmutableList) OmniExpressionChecker(com.huawei.boostkit.omnidata.expression.OmniExpressionChecker) HivePushdownUtil.isColumnsCanOffload(io.prestosql.plugin.hive.rule.HivePushdownUtil.isColumnsCanOffload) Objects.requireNonNull(java.util.Objects.requireNonNull) ConnectorPlanOptimizer(io.prestosql.spi.ConnectorPlanOptimizer) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) HiveMetadata(io.prestosql.plugin.hive.HiveMetadata) Symbol(io.prestosql.spi.plan.Symbol) HivePushdownUtil.checkStorageFormat(io.prestosql.plugin.hive.rule.HivePushdownUtil.checkStorageFormat) FilterStatsCalculatorService(io.prestosql.spi.plan.FilterStatsCalculatorService) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) SymbolAllocator(io.prestosql.spi.SymbolAllocator) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) Estimate(io.prestosql.spi.statistics.Estimate) ValuesNode(io.prestosql.spi.plan.ValuesNode) BASIC_COLUMN_EXTRACTOR(io.prestosql.spi.relation.DomainTranslator.BASIC_COLUMN_EXTRACTOR) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Sets.intersection(com.google.common.collect.Sets.intersection) PlanNodeIdAllocator(io.prestosql.spi.plan.PlanNodeIdAllocator) RowExpression(io.prestosql.spi.relation.RowExpression) ImmutableBiMap.toImmutableBiMap(com.google.common.collect.ImmutableBiMap.toImmutableBiMap) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Estimate(io.prestosql.spi.statistics.Estimate) Symbol(io.prestosql.spi.plan.Symbol) Type(io.prestosql.spi.type.Type) TableStatistics(io.prestosql.spi.statistics.TableStatistics) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) LinkedHashMap(java.util.LinkedHashMap) ImmutableBiMap.toImmutableBiMap(com.google.common.collect.ImmutableBiMap.toImmutableBiMap)

Example 2 with HiveMetadata

use of io.prestosql.plugin.hive.HiveMetadata in project boostkit-bigdata by kunpengcompute.

the class HiveFilterPushdown method getMetadata.

private HiveMetadata getMetadata(TableHandle tableHandle) {
    ConnectorMetadata metadata = transactionManager.get(tableHandle.getTransaction());
    checkState(metadata instanceof HiveMetadata, "metadata must be HiveMetadata");
    return (HiveMetadata) metadata;
}
Also used : HiveMetadata(io.prestosql.plugin.hive.HiveMetadata) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata)

Example 3 with HiveMetadata

use of io.prestosql.plugin.hive.HiveMetadata in project boostkit-bigdata by kunpengcompute.

the class TestHivePartialAggregationPushdown method createOptimizer.

private static HivePartialAggregationPushdown createOptimizer() {
    HiveMetadataFactory hiveMetadataFactory = Mockito.mock(HiveMetadataFactory.class);
    HiveMetadata hiveMetadata = simulationHiveMetadata();
    Mockito.when(hiveMetadataFactory.get()).thenReturn(hiveMetadata);
    StandardFunctionResolution resolution = new FunctionResolution(OFFLOAD_METADATA.getFunctionAndTypeManager());
    HivePartialAggregationPushdown optimizer = new HivePartialAggregationPushdown(simulationHiveTransactionManager(), OFFLOAD_METADATA.getFunctionAndTypeManager(), resolution, hiveMetadataFactory);
    return optimizer;
}
Also used : HiveMetadataFactory(io.prestosql.plugin.hive.HiveMetadataFactory) HiveMetadata(io.prestosql.plugin.hive.HiveMetadata) TestHivePushdownUtil.simulationHiveMetadata(io.prestosql.plugin.hive.rule.TestHivePushdownUtil.simulationHiveMetadata) StandardFunctionResolution(io.prestosql.spi.function.StandardFunctionResolution) StandardFunctionResolution(io.prestosql.spi.function.StandardFunctionResolution) FunctionResolution(io.prestosql.sql.relational.FunctionResolution)

Example 4 with HiveMetadata

use of io.prestosql.plugin.hive.HiveMetadata in project boostkit-bigdata by kunpengcompute.

the class TestHivePlanOptimizerProvider method testProvider.

@Test
public void testProvider() {
    RowExpressionService expressionService = new ConnectorRowExpressionService(new RowExpressionDomainTranslator(OFFLOAD_METADATA), new RowExpressionDeterminismEvaluator(OFFLOAD_METADATA));
    HiveTransactionManager transactionManager = simulationHiveTransactionManager();
    StandardFunctionResolution resolution = new FunctionResolution(OFFLOAD_METADATA.getFunctionAndTypeManager());
    HivePartitionManager partitionManager = new HivePartitionManager(OFFLOAD_METADATA.getFunctionAndTypeManager(), 1, false, 1);
    ScalarStatsCalculator scalarStatsCalculator = new ScalarStatsCalculator(OFFLOAD_METADATA);
    StatsNormalizer normalizer = new StatsNormalizer();
    FilterStatsCalculator statsCalculator = new FilterStatsCalculator(OFFLOAD_METADATA, scalarStatsCalculator, normalizer);
    FilterStatsCalculatorService calculatorService = new ConnectorFilterStatsCalculatorService(statsCalculator);
    HiveMetadataFactory hiveMetadataFactory = Mockito.mock(HiveMetadataFactory.class);
    HiveMetadata hiveMetadata = simulationHiveMetadata();
    Mockito.when(hiveMetadataFactory.get()).thenReturn(hiveMetadata);
    HivePlanOptimizerProvider hivePlanOptimizerProvider = new HivePlanOptimizerProvider(transactionManager, expressionService, resolution, partitionManager, OFFLOAD_METADATA.getFunctionAndTypeManager(), calculatorService, hiveMetadataFactory);
    assertEquals(hivePlanOptimizerProvider.getLogicalPlanOptimizers().size(), 3);
    assertEquals(hivePlanOptimizerProvider.getPhysicalPlanOptimizers().size(), 3);
}
Also used : ConnectorFilterStatsCalculatorService(io.prestosql.cost.ConnectorFilterStatsCalculatorService) RowExpressionDeterminismEvaluator(io.prestosql.sql.relational.RowExpressionDeterminismEvaluator) StatsNormalizer(io.prestosql.cost.StatsNormalizer) HiveMetadataFactory(io.prestosql.plugin.hive.HiveMetadataFactory) ConnectorRowExpressionService(io.prestosql.sql.relational.ConnectorRowExpressionService) HivePartitionManager(io.prestosql.plugin.hive.HivePartitionManager) FilterStatsCalculatorService(io.prestosql.spi.plan.FilterStatsCalculatorService) ConnectorFilterStatsCalculatorService(io.prestosql.cost.ConnectorFilterStatsCalculatorService) ScalarStatsCalculator(io.prestosql.cost.ScalarStatsCalculator) RowExpressionDomainTranslator(io.prestosql.sql.relational.RowExpressionDomainTranslator) FilterStatsCalculator(io.prestosql.cost.FilterStatsCalculator) StandardFunctionResolution(io.prestosql.spi.function.StandardFunctionResolution) FunctionResolution(io.prestosql.sql.relational.FunctionResolution) RowExpressionService(io.prestosql.spi.relation.RowExpressionService) ConnectorRowExpressionService(io.prestosql.sql.relational.ConnectorRowExpressionService) TestHivePushdownUtil.simulationHiveMetadata(io.prestosql.plugin.hive.rule.TestHivePushdownUtil.simulationHiveMetadata) HiveMetadata(io.prestosql.plugin.hive.HiveMetadata) StandardFunctionResolution(io.prestosql.spi.function.StandardFunctionResolution) HiveTransactionManager(io.prestosql.plugin.hive.HiveTransactionManager) TestHivePushdownUtil.simulationHiveTransactionManager(io.prestosql.plugin.hive.rule.TestHivePushdownUtil.simulationHiveTransactionManager) Test(org.testng.annotations.Test)

Example 5 with HiveMetadata

use of io.prestosql.plugin.hive.HiveMetadata in project boostkit-bigdata by kunpengcompute.

the class TestHivePushdownUtil method simulationHiveTransactionManager.

protected static HiveTransactionManager simulationHiveTransactionManager() {
    HiveMetadata metadata = simulationHiveMetadata();
    HiveTransactionManager transactionManager = Mockito.mock(HiveTransactionManager.class);
    Mockito.when(transactionManager.get(OFFLOAD_TABLE_HANDLE.getTransaction())).thenReturn(metadata);
    return transactionManager;
}
Also used : HiveMetadata(io.prestosql.plugin.hive.HiveMetadata) HiveTransactionManager(io.prestosql.plugin.hive.HiveTransactionManager)

Aggregations

HiveMetadata (io.prestosql.plugin.hive.HiveMetadata)7 HiveTransactionManager (io.prestosql.plugin.hive.HiveTransactionManager)4 StandardFunctionResolution (io.prestosql.spi.function.StandardFunctionResolution)4 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)3 HivePartitionManager (io.prestosql.plugin.hive.HivePartitionManager)3 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)3 ConnectorMetadata (io.prestosql.spi.connector.ConnectorMetadata)3 ConnectorTableMetadata (io.prestosql.spi.connector.ConnectorTableMetadata)3 Constraint (io.prestosql.spi.connector.Constraint)3 FilterStatsCalculatorService (io.prestosql.spi.plan.FilterStatsCalculatorService)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)2 Preconditions.checkState (com.google.common.base.Preconditions.checkState)2 BiMap (com.google.common.collect.BiMap)2 ImmutableBiMap.toImmutableBiMap (com.google.common.collect.ImmutableBiMap.toImmutableBiMap)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)2 Sets.intersection (com.google.common.collect.Sets.intersection)2 OmniExpressionChecker (com.huawei.boostkit.omnidata.expression.OmniExpressionChecker)2