Search in sources :

Example 31 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class TableAccessAnalyzer method genColNameMap.

/*
   * This method takes in an input operator and a subset of its output
   * column names, and generates the input column names for the operator
   * corresponding to those outputs. If the mapping from the input column
   * name to the output column name is not simple, the method returns
   * false, else it returns true. The list of output column names is
   * modified by this method to be the list of corresponding input column
   * names.
   */
private static boolean genColNameMap(Operator<? extends OperatorDesc> op, List<String> currColNames) {
    List<ExprNodeDesc> colList = null;
    List<String> outputColNames = null;
    assert (op.columnNamesRowResolvedCanBeObtained());
    // column names
    if (op instanceof SelectOperator) {
        SelectDesc selectDesc = ((SelectOperator) op).getConf();
        if (!selectDesc.isSelStarNoCompute()) {
            colList = selectDesc.getColList();
            outputColNames = selectDesc.getOutputColumnNames();
            // Only columns and constants can be selected
            for (int pos = 0; pos < colList.size(); pos++) {
                ExprNodeDesc colExpr = colList.get(pos);
                String outputColName = outputColNames.get(pos);
                // If it is not a column we need for the keys, move on
                if (!currColNames.contains(outputColName)) {
                    continue;
                }
                if (colExpr instanceof ExprNodeConstantDesc) {
                    currColNames.remove(outputColName);
                    continue;
                } else if (colExpr instanceof ExprNodeColumnDesc) {
                    String inputColName = ((ExprNodeColumnDesc) colExpr).getColumn();
                    if (!outputColName.equals(inputColName)) {
                        currColNames.set(currColNames.indexOf(outputColName), inputColName);
                    }
                } else {
                    // the column map can not be generated
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 32 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method insertSelectForSemijoin.

/**
   * Construct a selection operator for semijoin that filter out all fields
   * other than the group by keys.
   *
   * @param fields
   *          list of fields need to be output
   * @param input
   *          input operator
   * @return the selection operator.
   * @throws SemanticException
   */
private Operator insertSelectForSemijoin(ArrayList<ASTNode> fields, Operator input) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    RowResolver outputRR = new RowResolver();
    // construct the list of columns that need to be projected
    for (int i = 0; i < fields.size(); ++i) {
        ASTNode field = fields.get(i);
        ExprNodeDesc exprNode = genExprNodeDesc(field, inputRR);
        String colName = getColumnInternalName(i);
        outputColumnNames.add(colName);
        ColumnInfo colInfo = new ColumnInfo(colName, exprNode.getTypeInfo(), "", false);
        outputRR.putExpression(field, colInfo);
        colList.add(exprNode);
        colExprMap.put(colName, exprNode);
    }
    // create selection operator
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, outputColumnNames, false), new RowSchema(outputRR.getColumnInfos()), input), outputRR);
    output.setColumnExprMap(colExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 33 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class HiveOpConverter method genInputSelectForUnion.

private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, ArrayList<ColumnInfo> uColumnInfo) throws SemanticException {
    Iterator<ColumnInfo> oIter = origInputOp.getSchema().getSignature().iterator();
    Iterator<ColumnInfo> uIter = uColumnInfo.iterator();
    List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
    List<String> colName = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    boolean needSelectOp = false;
    while (oIter.hasNext()) {
        ColumnInfo oInfo = oIter.next();
        ColumnInfo uInfo = uIter.next();
        if (!oInfo.isSameColumnForRR(uInfo)) {
            needSelectOp = true;
        }
        ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
        if (!oInfo.getType().equals(uInfo.getType())) {
            column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
        }
        columns.add(column);
        colName.add(uInfo.getInternalName());
        columnExprMap.put(uInfo.getInternalName(), column);
    }
    if (needSelectOp) {
        return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp);
    } else {
        return origInputOp;
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 34 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class Vectorizer method vectorizeSelectOperator.

public static Operator<? extends OperatorDesc> vectorizeSelectOperator(Operator<? extends OperatorDesc> selectOp, VectorizationContext vContext) throws HiveException {
    SelectDesc selectDesc = (SelectDesc) selectOp.getConf();
    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
    selectDesc.setVectorDesc(vectorSelectDesc);
    List<ExprNodeDesc> colList = selectDesc.getColList();
    int index = 0;
    final int size = colList.size();
    VectorExpression[] vectorSelectExprs = new VectorExpression[size];
    int[] projectedOutputColumns = new int[size];
    for (int i = 0; i < size; i++) {
        ExprNodeDesc expr = colList.get(i);
        VectorExpression ve = vContext.getVectorExpression(expr);
        projectedOutputColumns[i] = ve.getOutputColumn();
        if (ve instanceof IdentityExpression) {
            // Suppress useless evaluation.
            continue;
        }
        vectorSelectExprs[index++] = ve;
    }
    if (index < size) {
        vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index);
    }
    vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
    vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns);
    return OperatorFactory.getVectorOperator(selectOp.getCompilationOpContext(), selectDesc, vContext);
}
Also used : VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IdentityExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression)

Example 35 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveProject projectRel) throws SemanticException {
    OpAttr inputOpAf = dispatch(projectRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + projectRel.getId() + ":" + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]");
    }
    WindowingSpec windowingSpec = new WindowingSpec();
    List<String> exprNames = new ArrayList<String>(projectRel.getRowType().getFieldNames());
    List<ExprNodeDesc> exprCols = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int pos = 0; pos < projectRel.getChildExps().size(); pos++) {
        ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel.getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), projectRel.getRowType(), inputOpAf.vcolsInCalcite, projectRel.getCluster().getTypeFactory(), true);
        ExprNodeDesc exprCol = projectRel.getChildExps().get(pos).accept(converter);
        colExprMap.put(exprNames.get(pos), exprCol);
        exprCols.add(exprCol);
        // TODO: Cols that come through PTF should it retain (VirtualColumness)?
        if (converter.getWindowFunctionSpec() != null) {
            for (WindowFunctionSpec wfs : converter.getWindowFunctionSpec()) {
                windowingSpec.addWindowFunction(wfs);
            }
        }
    }
    if (windowingSpec.getWindowExpressions() != null && !windowingSpec.getWindowExpressions().isEmpty()) {
        inputOpAf = genPTF(inputOpAf, windowingSpec);
    }
    // TODO: is this a safe assumption (name collision, external names...)
    SelectDesc sd = new SelectDesc(exprCols, exprNames);
    Pair<ArrayList<ColumnInfo>, Set<Integer>> colInfoVColPair = createColInfos(projectRel.getChildExps(), exprCols, exprNames, inputOpAf);
    SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema(colInfoVColPair.getKey()), inputOpAf.inputs.get(0));
    selOp.setColumnExprMap(colExprMap);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]");
    }
    return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) WindowingSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec) WindowFunctionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)55 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)50 ArrayList (java.util.ArrayList)43 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)32 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)31 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)30 HashMap (java.util.HashMap)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)25 LinkedHashMap (java.util.LinkedHashMap)20 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)15 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)13 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)13 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)13 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)13 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)13 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)13 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)12