Search in sources :

Example 1 with LateralViewJoinDesc

use of org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc in project hive by apache.

the class SemanticAnalyzer method genLateralViewPlan.

private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) throws SemanticException {
    RowResolver lvForwardRR = new RowResolver();
    RowResolver source = opParseCtx.get(op).getRowResolver();
    Map<String, ExprNodeDesc> lvfColExprMap = new HashMap<String, ExprNodeDesc>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<String> colNames = new ArrayList<String>();
    for (ColumnInfo col : source.getColumnInfos()) {
        String[] tabCol = source.reverseLookup(col.getInternalName());
        lvForwardRR.put(tabCol[0], tabCol[1], col);
        ExprNodeDesc colExpr = new ExprNodeColumnDesc(col);
        colList.add(colExpr);
        colNames.add(colExpr.getName());
        lvfColExprMap.put(col.getInternalName(), colExpr);
        selColExprMap.put(col.getInternalName(), colExpr.clone());
    }
    Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), op), lvForwardRR);
    lvForward.setColumnExprMap(lvfColExprMap);
    // The order in which the two paths are added is important. The
    // lateral view join operator depends on having the select operator
    // give it the row first.
    // Get the all path by making a select(*).
    RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
    // Operator allPath = op;
    SelectDesc sDesc = new SelectDesc(colList, colNames, false);
    sDesc.setSelStarNoCompute(true);
    Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(sDesc, new RowSchema(allPathRR.getColumnInfos()), lvForward), allPathRR);
    allPath.setColumnExprMap(selColExprMap);
    int allColumns = allPathRR.getColumnInfos().size();
    // Get the UDTF Path
    QB blankQb = new QB(null, null, false);
    Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree.getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
    // add udtf aliases to QB
    for (String udtfAlias : blankQb.getAliases()) {
        qb.addAlias(udtfAlias);
    }
    RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();
    // Merge the two into the lateral view join
    // The cols of the merged result will be the combination of both the
    // cols of the UDTF path and the cols of the all path. The internal
    // names have to be changed to avoid conflicts
    RowResolver lateralViewRR = new RowResolver();
    ArrayList<String> outputInternalColNames = new ArrayList<String>();
    // For PPD, we need a column to expression map so that during the walk,
    // the processor knows how to transform the internal col names.
    // Following steps are dependant on the fact that we called
    // LVmerge.. in the above order
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames);
    LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames);
    Operator lateralViewJoin = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), new RowSchema(lateralViewRR.getColumnInfos()), allPath, udtfPath), lateralViewRR);
    lateralViewJoin.setColumnExprMap(colExprMap);
    return lateralViewJoin;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) LateralViewJoinDesc(org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) LateralViewForwardDesc(org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)1 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)1 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)1 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)1 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)1 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)1 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 LateralViewForwardDesc (org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc)1 LateralViewJoinDesc (org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc)1