Search in sources :

Example 21 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class TestColumnPrunerProcCtx method testGetSelectNestedColPathsFromChildren7.

// Test select pow(root.col1.b, root.col2) from table test(root
// struct<col1:struct<a:boolean,b:double>, col2:double>);
@Test
public void testGetSelectNestedColPathsFromChildren7() {
    ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null);
    ExprNodeDesc colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false);
    ExprNodeDesc col1 = new ExprNodeFieldDesc(col1Type, colDesc, "col1", false);
    ExprNodeDesc fieldDesc1 = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, col1, "b", false);
    colDesc = new ExprNodeColumnDesc(col3Type, "root", "test", false);
    ExprNodeDesc col2 = new ExprNodeFieldDesc(col2Type, colDesc, "col2", false);
    final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0"));
    GenericUDF udf = mock(GenericUDFPower.class);
    List<ExprNodeDesc> list = new ArrayList<>();
    list.add(fieldDesc1);
    list.add(col2);
    ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, udf, "pow", list);
    SelectOperator selectOperator = buildSelectOperator(Arrays.asList(funcDesc), paths);
    List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths);
    compareTestResults(groups, "root.col1.b", "root.col2");
}
Also used : GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 22 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class CorrelationUtilities method replaceReduceSinkWithSelectOperator.

// replace the cRS to SEL operator
protected static SelectOperator replaceReduceSinkWithSelectOperator(ReduceSinkOperator childRS, ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {
    RowSchema inputRS = childRS.getSchema();
    SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(), childRS.getConf().getOutputValueColumnNames());
    Operator<?> parent = getSingleParent(childRS);
    parent.getChildOperators().clear();
    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(inputRS.getSignature()), parent);
    sel.setColumnExprMap(childRS.getColumnExprMap());
    sel.setChildOperators(childRS.getChildOperators());
    for (Operator<? extends Serializable> ch : childRS.getChildOperators()) {
        ch.replaceParent(childRS, sel);
    }
    childRS.setChildOperators(null);
    childRS.setParentOperators(null);
    procCtx.addRemovedOperator(childRS);
    return sel;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 23 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class SimpleFetchOptimizer method checkThreshold.

private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception {
    if (limit > 0) {
        if (data.hasOnlyPruningFilter()) {
            /* partitioned table + query has only pruning filters */
            return true;
        } else if (data.isPartitioned() == false && data.isFiltered() == false) {
            /* unpartitioned table + no filters */
            return true;
        }
    /* fall through */
    }
    long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
    if (threshold < 0) {
        return true;
    }
    Operator child = data.scanOp.getChildOperators().get(0);
    if (child instanceof SelectOperator) {
        // select *, constant and casts can be allowed without a threshold check
        if (checkExpressions((SelectOperator) child)) {
            return true;
        }
    }
    return data.isDataLengthWithInThreshold(pctx, threshold);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ListSinkOperator(org.apache.hadoop.hive.ql.exec.ListSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator)

Example 24 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveProject projectRel) throws SemanticException {
    OpAttr inputOpAf = dispatch(projectRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + projectRel.getId() + ":" + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]");
    }
    WindowingSpec windowingSpec = new WindowingSpec();
    List<String> exprNames = new ArrayList<String>(projectRel.getRowType().getFieldNames());
    List<ExprNodeDesc> exprCols = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int pos = 0; pos < projectRel.getChildExps().size(); pos++) {
        ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel.getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), projectRel.getRowType(), inputOpAf.vcolsInCalcite, projectRel.getCluster().getTypeFactory(), true);
        ExprNodeDesc exprCol = projectRel.getChildExps().get(pos).accept(converter);
        colExprMap.put(exprNames.get(pos), exprCol);
        exprCols.add(exprCol);
        // TODO: Cols that come through PTF should it retain (VirtualColumness)?
        if (converter.getWindowFunctionSpec() != null) {
            for (WindowFunctionSpec wfs : converter.getWindowFunctionSpec()) {
                windowingSpec.addWindowFunction(wfs);
            }
        }
    }
    if (windowingSpec.getWindowExpressions() != null && !windowingSpec.getWindowExpressions().isEmpty()) {
        inputOpAf = genPTF(inputOpAf, windowingSpec);
    }
    // TODO: is this a safe assumption (name collision, external names...)
    SelectDesc sd = new SelectDesc(exprCols, exprNames);
    Pair<ArrayList<ColumnInfo>, Set<Integer>> colInfoVColPair = createColInfos(projectRel.getChildExps(), exprCols, exprNames, inputOpAf);
    SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema(colInfoVColPair.getKey()), inputOpAf.inputs.get(0));
    selOp.setColumnExprMap(colExprMap);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]");
    }
    return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) WindowingSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec) WindowFunctionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 25 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class DynamicPartitionPruningOptimization method generateEventOperatorPlan.

private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String column, String columnType) {
    // we will put a fork in the plan at the source of the reduce sink
    Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
    // we need the expr that generated the key of the reduce sink
    ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
    // we also need the expr for the partitioned table
    ExprNodeDesc partKey = ctx.parent.getChildren().get(0);
    if (LOG.isDebugEnabled()) {
        LOG.debug("key expr: " + key);
        LOG.debug("partition key expr: " + partKey);
    }
    List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
    keyExprs.add(key);
    // group by requires "ArrayList", don't ask.
    ArrayList<String> outputNames = new ArrayList<String>();
    outputNames.add(HiveConf.getColumnInternalName(0));
    // project the relevant key column
    SelectDesc select = new SelectDesc(keyExprs, outputNames);
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, parentOfRS);
    // do a group by on the list to dedup
    float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    ArrayList<ExprNodeDesc> groupByExprs = new ArrayList<ExprNodeDesc>();
    ExprNodeDesc groupByExpr = new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), null, false);
    groupByExprs.add(groupByExpr);
    GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, outputNames, groupByExprs, new ArrayList<AggregationDesc>(), false, groupByMemoryUsage, memoryThreshold, null, false, -1, true);
    GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, selectOp);
    Map<String, ExprNodeDesc> colMap = new HashMap<String, ExprNodeDesc>();
    colMap.put(outputNames.get(0), groupByExpr);
    groupByOp.setColumnExprMap(colMap);
    // finally add the event broadcast operator
    if (HiveConf.getVar(parseContext.getConf(), ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        DynamicPruningEventDesc eventDesc = new DynamicPruningEventDesc();
        eventDesc.setTableScan(ts);
        eventDesc.setGenerator(ctx.generator);
        eventDesc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        eventDesc.setTargetColumnName(column);
        eventDesc.setTargetColumnType(columnType);
        eventDesc.setPartKey(partKey);
        OperatorFactory.getAndMakeChild(eventDesc, groupByOp);
    } else {
        // Must be spark branch
        SparkPartitionPruningSinkDesc desc = new SparkPartitionPruningSinkDesc();
        desc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        desc.addTarget(column, columnType, partKey, null, ts);
        SparkPartitionPruningSinkOperator dppSink = (SparkPartitionPruningSinkOperator) OperatorFactory.getAndMakeChild(desc, groupByOp);
        if (HiveConf.getBoolVar(parseContext.getConf(), ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) {
            mayReuseExistingDPPSink(parentOfRS, Arrays.asList(selectOp, groupByOp, dppSink));
        }
    }
}
Also used : GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator)

Aggregations

SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)20 ArrayList (java.util.ArrayList)14 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)11 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)10 HashMap (java.util.HashMap)8 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)8 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)8 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)8 LinkedHashMap (java.util.LinkedHashMap)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)7 Operator (org.apache.hadoop.hive.ql.exec.Operator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 Test (org.junit.Test)7 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)6 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)4