use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext in project hive by apache.
the class CalcitePlanner method createPlanner.
private static RelOptPlanner createPlanner(HiveConf conf, Set<RelNode> corrScalarRexSQWithAgg, Set<RelNode> scalarAggNoGbyNoWin) {
final Double maxSplitSize = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
final Double maxMemory = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
HiveRulesRegistry registry = new HiveRulesRegistry();
Properties calciteConfigProperties = new Properties();
calciteConfigProperties.setProperty(CalciteConnectionProperty.TIME_ZONE.camelName(), conf.getLocalTimeZone().getId());
calciteConfigProperties.setProperty(CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), Boolean.FALSE.toString());
CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties);
boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_CORRELATED_MULTI_KEY_JOINS);
HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, corrScalarRexSQWithAgg, scalarAggNoGbyNoWin, new HiveConfPlannerContext(isCorrelatedColumns));
return HiveVolcanoPlanner.createPlanner(confContext);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext in project hive by apache.
the class FilterSelectivityEstimator method visitCall.
@Override
public Double visitCall(RexCall call) {
if (!deep) {
return 1.0;
}
/*
* Ignore any predicates on partition columns because we have already
* accounted for these in the Table row count.
*/
if (isPartitionPredicate(call, this.childRel)) {
return 1.0;
}
Double selectivity = null;
SqlKind op = getOp(call);
switch(op) {
case AND:
{
selectivity = computeConjunctionSelectivity(call);
break;
}
case OR:
{
selectivity = computeDisjunctionSelectivity(call);
break;
}
case NOT:
case NOT_EQUALS:
{
selectivity = computeNotEqualitySelectivity(call);
break;
}
case IS_NOT_NULL:
{
if (childRel instanceof HiveTableScan) {
double noOfNulls = getMaxNulls(call, (HiveTableScan) childRel);
double totalNoOfTuples = mq.getRowCount(childRel);
if (totalNoOfTuples >= noOfNulls) {
selectivity = (totalNoOfTuples - noOfNulls) / Math.max(totalNoOfTuples, 1);
} else {
// If we are running explain, we will print the warning in the console
// and the log files. Otherwise, we just print it in the log files.
HiveConfPlannerContext ctx = childRel.getCluster().getPlanner().getContext().unwrap(HiveConfPlannerContext.class);
String msg = "Invalid statistics: Number of null values > number of tuples. " + "Consider recomputing statistics for table: " + ((RelOptHiveTable) childRel.getTable()).getHiveTableMD().getFullyQualifiedName();
if (ctx.isExplainPlan()) {
SessionState.getConsole().printError("WARNING: " + msg);
}
LOG.warn(msg);
selectivity = ((double) 1 / (double) 3);
}
} else {
selectivity = computeNotEqualitySelectivity(call);
}
break;
}
case LESS_THAN_OR_EQUAL:
case GREATER_THAN_OR_EQUAL:
case LESS_THAN:
case GREATER_THAN:
{
selectivity = ((double) 1 / (double) 3);
break;
}
case IN:
{
// TODO: 1) check for duplicates 2) We assume in clause values to be
// present in NDV which may not be correct (Range check can find it) 3) We
// assume values in NDV set is uniformly distributed over col values
// (account for skewness - histogram).
selectivity = computeFunctionSelectivity(call);
if (selectivity != null) {
selectivity = selectivity * (call.operands.size() - 1);
if (selectivity <= 0.0) {
selectivity = 0.10;
} else if (selectivity >= 1.0) {
selectivity = 1.0;
}
}
break;
}
default:
selectivity = computeFunctionSelectivity(call);
}
return selectivity;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext in project hive by apache.
the class CalcitePlanner method createPlanner.
private static RelOptPlanner createPlanner(HiveConf conf, Set<RelNode> corrScalarRexSQWithAgg, StatsSource statsSource, boolean isExplainPlan) {
final Double maxSplitSize = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
final Double maxMemory = (double) HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
HiveRulesRegistry registry = new HiveRulesRegistry();
Properties calciteConfigProperties = new Properties();
calciteConfigProperties.setProperty(CalciteConnectionProperty.TIME_ZONE.camelName(), conf.getLocalTimeZone().getId());
calciteConfigProperties.setProperty(CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), Boolean.FALSE.toString());
CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties);
boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS);
boolean heuristicMaterializationStrategy = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY).equals("heuristic");
HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, corrScalarRexSQWithAgg, new HiveConfPlannerContext(isCorrelatedColumns, heuristicMaterializationStrategy, isExplainPlan), statsSource);
RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext);
planner.addListener(new RuleEventLogger());
return planner;
}
Aggregations