Search in sources :

Example 1 with HiveConfPlannerContext

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext in project hive by apache.

the class CalcitePlanner method createPlanner.

private static RelOptPlanner createPlanner(HiveConf conf, Set<RelNode> corrScalarRexSQWithAgg, Set<RelNode> scalarAggNoGbyNoWin) {
    final Double maxSplitSize = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
    final Double maxMemory = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
    HiveRulesRegistry registry = new HiveRulesRegistry();
    Properties calciteConfigProperties = new Properties();
    calciteConfigProperties.setProperty(CalciteConnectionProperty.TIME_ZONE.camelName(), conf.getLocalTimeZone().getId());
    calciteConfigProperties.setProperty(CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), Boolean.FALSE.toString());
    CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties);
    boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_CORRELATED_MULTI_KEY_JOINS);
    HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, corrScalarRexSQWithAgg, scalarAggNoGbyNoWin, new HiveConfPlannerContext(isCorrelatedColumns));
    return HiveVolcanoPlanner.createPlanner(confContext);
}
Also used : CalciteConnectionConfigImpl(org.apache.calcite.config.CalciteConnectionConfigImpl) HivePlannerContext(org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext) HiveRulesRegistry(org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry) CalciteConnectionConfig(org.apache.calcite.config.CalciteConnectionConfig) HiveConfPlannerContext(org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext) QueryProperties(org.apache.hadoop.hive.ql.QueryProperties) Properties(java.util.Properties) HiveAlgorithmsConf(org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf)

Example 2 with HiveConfPlannerContext

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext in project hive by apache.

the class FilterSelectivityEstimator method visitCall.

@Override
public Double visitCall(RexCall call) {
    if (!deep) {
        return 1.0;
    }
    /*
     * Ignore any predicates on partition columns because we have already
     * accounted for these in the Table row count.
     */
    if (isPartitionPredicate(call, this.childRel)) {
        return 1.0;
    }
    Double selectivity = null;
    SqlKind op = getOp(call);
    switch(op) {
        case AND:
            {
                selectivity = computeConjunctionSelectivity(call);
                break;
            }
        case OR:
            {
                selectivity = computeDisjunctionSelectivity(call);
                break;
            }
        case NOT:
        case NOT_EQUALS:
            {
                selectivity = computeNotEqualitySelectivity(call);
                break;
            }
        case IS_NOT_NULL:
            {
                if (childRel instanceof HiveTableScan) {
                    double noOfNulls = getMaxNulls(call, (HiveTableScan) childRel);
                    double totalNoOfTuples = mq.getRowCount(childRel);
                    if (totalNoOfTuples >= noOfNulls) {
                        selectivity = (totalNoOfTuples - noOfNulls) / Math.max(totalNoOfTuples, 1);
                    } else {
                        // If we are running explain, we will print the warning in the console
                        // and the log files. Otherwise, we just print it in the log files.
                        HiveConfPlannerContext ctx = childRel.getCluster().getPlanner().getContext().unwrap(HiveConfPlannerContext.class);
                        String msg = "Invalid statistics: Number of null values > number of tuples. " + "Consider recomputing statistics for table: " + ((RelOptHiveTable) childRel.getTable()).getHiveTableMD().getFullyQualifiedName();
                        if (ctx.isExplainPlan()) {
                            SessionState.getConsole().printError("WARNING: " + msg);
                        }
                        LOG.warn(msg);
                        selectivity = ((double) 1 / (double) 3);
                    }
                } else {
                    selectivity = computeNotEqualitySelectivity(call);
                }
                break;
            }
        case LESS_THAN_OR_EQUAL:
        case GREATER_THAN_OR_EQUAL:
        case LESS_THAN:
        case GREATER_THAN:
            {
                selectivity = ((double) 1 / (double) 3);
                break;
            }
        case IN:
            {
                // TODO: 1) check for duplicates 2) We assume in clause values to be
                // present in NDV which may not be correct (Range check can find it) 3) We
                // assume values in NDV set is uniformly distributed over col values
                // (account for skewness - histogram).
                selectivity = computeFunctionSelectivity(call);
                if (selectivity != null) {
                    selectivity = selectivity * (call.operands.size() - 1);
                    if (selectivity <= 0.0) {
                        selectivity = 0.10;
                    } else if (selectivity >= 1.0) {
                        selectivity = 1.0;
                    }
                }
                break;
            }
        default:
            selectivity = computeFunctionSelectivity(call);
    }
    return selectivity;
}
Also used : HiveConfPlannerContext(org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext) SqlKind(org.apache.calcite.sql.SqlKind) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan)

Example 3 with HiveConfPlannerContext

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext in project hive by apache.

the class CalcitePlanner method createPlanner.

private static RelOptPlanner createPlanner(HiveConf conf, Set<RelNode> corrScalarRexSQWithAgg, StatsSource statsSource, boolean isExplainPlan) {
    final Double maxSplitSize = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
    final Double maxMemory = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
    HiveRulesRegistry registry = new HiveRulesRegistry();
    Properties calciteConfigProperties = new Properties();
    calciteConfigProperties.setProperty(CalciteConnectionProperty.TIME_ZONE.camelName(), conf.getLocalTimeZone().getId());
    calciteConfigProperties.setProperty(CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), Boolean.FALSE.toString());
    CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties);
    boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS);
    boolean heuristicMaterializationStrategy = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY).equals("heuristic");
    HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, corrScalarRexSQWithAgg, new HiveConfPlannerContext(isCorrelatedColumns, heuristicMaterializationStrategy, isExplainPlan), statsSource);
    RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext);
    planner.addListener(new RuleEventLogger());
    return planner;
}
Also used : CalciteConnectionConfigImpl(org.apache.calcite.config.CalciteConnectionConfigImpl) HivePlannerContext(org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext) HiveRulesRegistry(org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry) CalciteConnectionConfig(org.apache.calcite.config.CalciteConnectionConfig) HiveConfPlannerContext(org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext) QueryProperties(org.apache.hadoop.hive.ql.QueryProperties) Properties(java.util.Properties) RelOptPlanner(org.apache.calcite.plan.RelOptPlanner) HiveAlgorithmsConf(org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf) RuleEventLogger(org.apache.hadoop.hive.ql.optimizer.calcite.RuleEventLogger)

Aggregations

HiveConfPlannerContext (org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext)3 Properties (java.util.Properties)2 CalciteConnectionConfig (org.apache.calcite.config.CalciteConnectionConfig)2 CalciteConnectionConfigImpl (org.apache.calcite.config.CalciteConnectionConfigImpl)2 QueryProperties (org.apache.hadoop.hive.ql.QueryProperties)2 HivePlannerContext (org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext)2 HiveAlgorithmsConf (org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf)2 HiveRulesRegistry (org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry)2 RelOptPlanner (org.apache.calcite.plan.RelOptPlanner)1 SqlKind (org.apache.calcite.sql.SqlKind)1 RuleEventLogger (org.apache.hadoop.hive.ql.optimizer.calcite.RuleEventLogger)1 HiveTableScan (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan)1