Search in sources :

Example 6 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class ColumnPrunerProcCtx method getColsFromSelectExpr.

/**
 * Creates the list of internal column names (represented by field nodes)
 * from select expressions in a select operator. This function is used for the
 * select operator instead of the genColLists function (which is used by
 * the rest of the operators).
 *
 * @param op The select operator.
 * @return a list of field nodes representing the internal column names.
 */
public List<FieldNode> getColsFromSelectExpr(SelectOperator op) {
    List<FieldNode> cols = new ArrayList<>();
    SelectDesc conf = op.getConf();
    if (conf.isSelStarNoCompute()) {
        for (ColumnInfo colInfo : op.getSchema().getSignature()) {
            cols.add(new FieldNode(colInfo.getInternalName()));
        }
    } else {
        List<ExprNodeDesc> exprList = conf.getColList();
        for (ExprNodeDesc expr : exprList) {
            cols = mergeFieldNodesWithDesc(cols, expr);
        }
    }
    return cols;
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 7 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class MapJoinProcessor method genSelectPlan.

protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
    List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
    input.setChildOperators(null);
    // create a dummy select - This select is needed by the walker to split the
    // mapJoin later on
    RowSchema inputRS = input.getSchema();
    ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputs = new ArrayList<String>();
    List<String> outputCols = input.getConf().getOutputColumnNames();
    ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
        ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
        exprs.add(colDesc);
        outputs.add(internalName);
        ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
        newCol.setAlias(valueInfo.getAlias());
        outputRS.add(newCol);
        colExprMap.put(internalName, colDesc);
    }
    SelectDesc select = new SelectDesc(exprs, outputs, false);
    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
    sel.setColumnExprMap(colExprMap);
    // Insert the select operator in between.
    sel.setChildOperators(childOps);
    for (Operator<? extends OperatorDesc> ch : childOps) {
        ch.replaceParent(input, sel);
    }
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 8 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class CalcitePlanner method handleInsertStatement.

// This function serves as the wrapper of handleInsertStatementSpec in
// SemanticAnalyzer
Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb) throws SemanticException {
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    for (int i = 0; i < columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        colList.add(new ExprNodeColumnDesc(col));
    }
    ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
    RowResolver rowResolver = createRowResolver(columns);
    rowResolver = handleInsertStatementSpec(colList, dest, rowResolver, qb, selExprList);
    List<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < colList.size(); i++) {
        String outputCol = getColumnInternalName(i);
        colExprMap.put(outputCol, colList.get(i));
        columnNames.add(outputCol);
    }
    Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
    output.setColumnExprMap(colExprMap);
    return output;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 9 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class ParallelEdgeFixer method fixParallelEdge.

/**
 * Fixes a parallel edge going into a mapjoin by introducing a concentrator RS.
 */
private void fixParallelEdge(Operator<? extends OperatorDesc> p, Operator<?> o) throws SemanticException {
    LOG.info("Fixing parallel by adding a concentrator RS between {} -> {}", p, o);
    ReduceSinkDesc conf = (ReduceSinkDesc) p.getConf();
    ReduceSinkDesc newConf = (ReduceSinkDesc) conf.clone();
    Operator<SelectDesc> newSEL = buildSEL(p, conf);
    Operator<ReduceSinkDesc> newRS = OperatorFactory.getAndMakeChild(p.getCompilationOpContext(), newConf, new ArrayList<>());
    conf.setOutputName("forward_to_" + newRS);
    conf.setTag(0);
    newConf.setKeyCols(new ArrayList<>(conf.getKeyCols()));
    newRS.setSchema(new RowSchema(p.getSchema()));
    p.replaceChild(o, newSEL);
    newSEL.setParentOperators(Lists.<Operator<?>>newArrayList(p));
    newSEL.setChildOperators(Lists.<Operator<?>>newArrayList(newRS));
    newRS.setParentOperators(Lists.<Operator<?>>newArrayList(newSEL));
    newRS.setChildOperators(Lists.<Operator<?>>newArrayList(o));
    o.replaceParent(p, newRS);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 10 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class HiveProjectVisitor method visit.

@Override
OpAttr visit(HiveProject projectRel) throws SemanticException {
    OpAttr inputOpAf = hiveOpConverter.dispatch(projectRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + projectRel.getId() + ":" + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]");
    }
    WindowingSpec windowingSpec = new WindowingSpec();
    List<String> exprNames = new ArrayList<String>(projectRel.getRowType().getFieldNames());
    List<ExprNodeDesc> exprCols = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int pos = 0; pos < projectRel.getProjects().size(); pos++) {
        ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel.getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), projectRel.getRowType(), inputOpAf.vcolsInCalcite, projectRel.getCluster().getTypeFactory(), true);
        ExprNodeDesc exprCol = projectRel.getProjects().get(pos).accept(converter);
        colExprMap.put(exprNames.get(pos), exprCol);
        exprCols.add(exprCol);
        // TODO: Cols that come through PTF should it retain (VirtualColumness)?
        if (converter.getWindowFunctionSpec() != null) {
            for (WindowFunctionSpec wfs : converter.getWindowFunctionSpec()) {
                windowingSpec.addWindowFunction(wfs);
            }
        }
    }
    if (windowingSpec.getWindowExpressions() != null && !windowingSpec.getWindowExpressions().isEmpty()) {
        inputOpAf = genPTF(inputOpAf, windowingSpec);
    }
    // TODO: is this a safe assumption (name collision, external names...)
    SelectDesc sd = new SelectDesc(exprCols, exprNames);
    Pair<ArrayList<ColumnInfo>, Set<Integer>> colInfoVColPair = createColInfos(projectRel.getProjects(), exprCols, exprNames, inputOpAf);
    SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema(colInfoVColPair.getKey()), inputOpAf.inputs.get(0));
    selOp.setColumnExprMap(colExprMap);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]");
    }
    return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) WindowingSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec) WindowFunctionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec) ExprNodeConverter(org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)55 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)50 ArrayList (java.util.ArrayList)43 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)32 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)31 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)30 HashMap (java.util.HashMap)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)25 LinkedHashMap (java.util.LinkedHashMap)20 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)15 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)13 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)13 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)13 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)13 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)13 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)13 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)12