Search in sources :

Example 16 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genSelectAllDesc.

private Operator genSelectAllDesc(Operator input) {
    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    for (ColumnInfo col : columns) {
        colList.add(new ExprNodeColumnDesc(col, true));
        columnNames.add(col.getInternalName());
        columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true));
    }
    RowResolver outputRR = inputRR.duplicate();
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR);
    output.setColumnExprMap(columnExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 17 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genLateralViewPlan.

private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) throws SemanticException {
    RowResolver lvForwardRR = new RowResolver();
    RowResolver source = opParseCtx.get(op).getRowResolver();
    Map<String, ExprNodeDesc> lvfColExprMap = new HashMap<String, ExprNodeDesc>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<String> colNames = new ArrayList<String>();
    for (ColumnInfo col : source.getColumnInfos()) {
        String[] tabCol = source.reverseLookup(col.getInternalName());
        lvForwardRR.put(tabCol[0], tabCol[1], col);
        ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(col);
        colList.add(colExpr);
        colNames.add(colExpr.getColumn());
        lvfColExprMap.put(col.getInternalName(), colExpr);
        selColExprMap.put(col.getInternalName(), colExpr.clone());
    }
    Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), op), lvForwardRR);
    lvForward.setColumnExprMap(lvfColExprMap);
    // The order in which the two paths are added is important. The
    // lateral view join operator depends on having the select operator
    // give it the row first.
    // Get the all path by making a select(*).
    RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
    // Operator allPath = op;
    SelectDesc sDesc = new SelectDesc(colList, colNames, false);
    sDesc.setSelStarNoCompute(true);
    Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(sDesc, new RowSchema(allPathRR.getColumnInfos()), lvForward), allPathRR);
    allPath.setColumnExprMap(selColExprMap);
    int allColumns = allPathRR.getColumnInfos().size();
    // Get the UDTF Path
    QB blankQb = new QB(null, null, false);
    Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree.getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
    // add udtf aliases to QB
    for (String udtfAlias : blankQb.getAliases()) {
        qb.addAlias(udtfAlias);
    }
    RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();
    // Merge the two into the lateral view join
    // The cols of the merged result will be the combination of both the
    // cols of the UDTF path and the cols of the all path. The internal
    // names have to be changed to avoid conflicts
    RowResolver lateralViewRR = new RowResolver();
    List<String> outputInternalColNames = new ArrayList<String>();
    // For PPD, we need a column to expression map so that during the walk,
    // the processor knows how to transform the internal col names.
    // Following steps are dependant on the fact that we called
    // LVmerge.. in the above order
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames);
    LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames);
    Operator lateralViewJoin = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), new RowSchema(lateralViewRR.getColumnInfos()), allPath, udtfPath), lateralViewRR);
    lateralViewJoin.setColumnExprMap(colExprMap);
    return lateralViewJoin;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) LateralViewJoinDesc(org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) LateralViewForwardDesc(org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 18 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genMaterializedViewDataOrgPlan.

private Operator genMaterializedViewDataOrgPlan(List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos, RowResolver inputRR, Operator input) {
    // In this case, we will introduce a RS and immediately after a SEL that restores
    // the row schema to what follow-up operations are expecting
    Set<String> keys = sortColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
    Set<String> distributeKeys = distributeColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
    List<ExprNodeDesc> keyCols = new ArrayList<>();
    List<String> keyColNames = new ArrayList<>();
    StringBuilder order = new StringBuilder();
    StringBuilder nullOrder = new StringBuilder();
    List<ExprNodeDesc> valCols = new ArrayList<>();
    List<String> valColNames = new ArrayList<>();
    List<ExprNodeDesc> partCols = new ArrayList<>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
    Map<String, String> nameMapping = new HashMap<>();
    // map _col0 to KEY._col0, etc
    for (ColumnInfo ci : inputRR.getRowSchema().getSignature()) {
        ExprNodeColumnDesc e = new ExprNodeColumnDesc(ci);
        String columnName = ci.getInternalName();
        if (keys.contains(columnName)) {
            // key (sort column)
            keyColNames.add(columnName);
            keyCols.add(e);
            colExprMap.put(Utilities.ReduceField.KEY + "." + columnName, e);
            nameMapping.put(columnName, Utilities.ReduceField.KEY + "." + columnName);
            order.append("+");
            nullOrder.append("a");
        } else {
            // value
            valColNames.add(columnName);
            valCols.add(e);
            colExprMap.put(Utilities.ReduceField.VALUE + "." + columnName, e);
            nameMapping.put(columnName, Utilities.ReduceField.VALUE + "." + columnName);
        }
        if (distributeKeys.contains(columnName)) {
            // distribute column
            partCols.add(e.clone());
        }
    }
    // Create Key/Value TableDesc. When the operator plan is split into MR tasks,
    // the reduce operator will initialize Extract operator with information
    // from Key and Value TableDesc
    List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, "");
    TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, order.toString(), nullOrder.toString());
    List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, "");
    TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
    List<List<Integer>> distinctColumnIndices = new ArrayList<>();
    // Number of reducers is set to default (-1)
    ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, Operation.NOT_ACID);
    RowResolver rsRR = new RowResolver();
    List<ColumnInfo> rsSignature = new ArrayList<>();
    for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
        rsSignature.add(colInfo);
        rsRR.put(nm[0], nm[1], colInfo);
        if (nm2 != null) {
            rsRR.addMappingOnly(nm2[0], nm2[1], colInfo);
        }
    }
    Operator<?> result = putOpInsertMap(OperatorFactory.getAndMakeChild(rsConf, new RowSchema(rsSignature), input), rsRR);
    result.setColumnExprMap(colExprMap);
    // Create SEL operator
    RowResolver selRR = new RowResolver();
    List<ColumnInfo> selSignature = new ArrayList<>();
    List<ExprNodeDesc> columnExprs = new ArrayList<>();
    List<String> colNames = new ArrayList<>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<>();
    for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        selSignature.add(colInfo);
        selRR.put(nm[0], nm[1], colInfo);
        if (nm2 != null) {
            selRR.addMappingOnly(nm2[0], nm2[1], colInfo);
        }
        String colName = colInfo.getInternalName();
        ExprNodeDesc exprNodeDesc;
        if (keys.contains(colName)) {
            exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false);
            columnExprs.add(exprNodeDesc);
        } else {
            exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false);
            columnExprs.add(exprNodeDesc);
        }
        colNames.add(colName);
        selColExprMap.put(colName, exprNodeDesc);
    }
    SelectDesc selConf = new SelectDesc(columnExprs, colNames);
    result = putOpInsertMap(OperatorFactory.getAndMakeChild(selConf, new RowSchema(selSignature), result), selRR);
    result.setColumnExprMap(selColExprMap);
    return result;
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PreInsertTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 19 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan5.

@SuppressWarnings("unchecked")
private void populateMapRedPlan5(Table src) throws SemanticException {
    // map-side work
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    Operator<ReduceSinkDesc> op0 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("0")), Utilities.makeList(getStringColumn("0"), getStringColumn("1")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    mr.setReduceWork(rWork);
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op0.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan5.out"), Utilities.defaultTd, false));
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
    rWork.setReducer(op2);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 20 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan2.

@SuppressWarnings("unchecked")
private void populateMapRedPlan2(Table src) throws Exception {
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    // map-side work
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
    addMapWork(mr, src, "a", op1);
    ReduceWork rWork = new ReduceWork();
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);
    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan2.out"), Utilities.defaultTd, false));
    Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
    rWork.setReducer(op2);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Aggregations

SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)55 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)50 ArrayList (java.util.ArrayList)43 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)32 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)31 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)30 HashMap (java.util.HashMap)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)25 LinkedHashMap (java.util.LinkedHashMap)20 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)15 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)13 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)13 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)13 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)13 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)13 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)13 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)12