Search in sources :

Example 6 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class HiveOpConverter method genReduceSinkAndBacktrackSelect.

private static SelectOperator genReduceSinkAndBacktrackSelect(Operator<?> input, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf, List<String> keepColNames) throws SemanticException {
    // 1. Generate RS operator
    // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
    // as empty string in table aliases is only allowed for virtual columns.
    String tableAlias = null;
    Set<String> tableNames = input.getSchema().getTableNames();
    for (String tableName : tableNames) {
        if (tableName != null) {
            if (tableName.length() == 0) {
                if (tableAlias == null) {
                    tableAlias = tableName;
                }
            } else {
                if (tableAlias == null || tableAlias.length() == 0) {
                    tableAlias = tableName;
                } else {
                    if (!tableName.equals(tableAlias)) {
                        throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
                    }
                }
            }
        }
    }
    if (tableAlias == null) {
        throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
    }
    // 1.2 Now generate RS operator
    ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, nullOrder, numReducers, acidOperation, hiveConf);
    // 2. Generate backtrack Select operator
    Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames, rsOp.getConf().getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(), rsOp.getValueIndex(), input);
    SelectDesc selectDesc = new SelectDesc(new ArrayList<ExprNodeDesc>(descriptors.values()), new ArrayList<String>(descriptors.keySet()));
    ArrayList<ColumnInfo> cinfoLst = createColInfosSubset(input, keepColNames);
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc, new RowSchema(cinfoLst), rsOp);
    selectOp.setColumnExprMap(descriptors);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]");
    }
    return selectOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 7 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class TableAccessAnalyzer method genColNameMap.

/*
   * This method takes in an input operator and a subset of its output
   * column names, and generates the input column names for the operator
   * corresponding to those outputs. If the mapping from the input column
   * name to the output column name is not simple, the method returns
   * false, else it returns true. The list of output column names is
   * modified by this method to be the list of corresponding input column
   * names.
   */
private static boolean genColNameMap(Operator<? extends OperatorDesc> op, List<String> currColNames) {
    List<ExprNodeDesc> colList = null;
    List<String> outputColNames = null;
    assert (op.columnNamesRowResolvedCanBeObtained());
    // column names
    if (op instanceof SelectOperator) {
        SelectDesc selectDesc = ((SelectOperator) op).getConf();
        if (!selectDesc.isSelStarNoCompute()) {
            colList = selectDesc.getColList();
            outputColNames = selectDesc.getOutputColumnNames();
            // Only columns and constants can be selected
            for (int pos = 0; pos < colList.size(); pos++) {
                ExprNodeDesc colExpr = colList.get(pos);
                String outputColName = outputColNames.get(pos);
                // If it is not a column we need for the keys, move on
                if (!currColNames.contains(outputColName)) {
                    continue;
                }
                if (colExpr instanceof ExprNodeConstantDesc) {
                    currColNames.remove(outputColName);
                    continue;
                } else if (colExpr instanceof ExprNodeColumnDesc) {
                    String inputColName = ((ExprNodeColumnDesc) colExpr).getColumn();
                    if (!outputColName.equals(inputColName)) {
                        currColNames.set(currColNames.indexOf(outputColName), inputColName);
                    }
                } else {
                    // the column map can not be generated
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 8 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class ColumnPrunerProcCtx method handleFilterUnionChildren.

/**
   * If the input filter operator has direct child(ren) which are union operator,
   * and the filter's column is not the same as union's
   * create select operator between them. The select operator has same number of columns as
   * pruned child operator.
   *
   * @param curOp
   *          The filter operator which need to handle children.
   * @throws SemanticException
   */
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
    if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
        return;
    }
    List<FieldNode> parentPrunList = prunedColLists.get(curOp);
    if (parentPrunList == null || parentPrunList.size() == 0) {
        return;
    }
    List<FieldNode> prunList = null;
    for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
        if (child instanceof UnionOperator) {
            prunList = genColLists(child);
            if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
                continue;
            }
            ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
            ArrayList<String> outputColNames = new ArrayList<String>();
            Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
            ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
            for (ColumnInfo colInfo : child.getSchema().getSignature()) {
                if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
                    continue;
                }
                ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
                exprs.add(colDesc);
                outputColNames.add(colInfo.getInternalName());
                ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
                newCol.setAlias(colInfo.getAlias());
                outputRS.add(newCol);
                colExprMap.put(colInfo.getInternalName(), colDesc);
            }
            SelectDesc select = new SelectDesc(exprs, outputColNames, false);
            curOp.removeChild(child);
            SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
            OperatorFactory.makeChild(sel, child);
            sel.setColumnExprMap(colExprMap);
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 9 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class ConvertJoinMapJoin method removeCycleCreatingSemiJoinOps.

// Remove any semijoin branch associated with hashjoin's parent's operator
// pipeline which can cause a cycle after hashjoin optimization.
private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, Operator<?> parentSelectOpOfBigTable, ParseContext parseContext) throws SemanticException {
    Map<ReduceSinkOperator, TableScanOperator> semiJoinMap = new HashMap<ReduceSinkOperator, TableScanOperator>();
    for (Operator<?> op : parentSelectOpOfBigTable.getChildOperators()) {
        if (!(op instanceof SelectOperator)) {
            continue;
        }
        while (op.getChildOperators().size() > 0) {
            op = op.getChildOperators().get(0);
        }
        // If not ReduceSink Op, skip
        if (!(op instanceof ReduceSinkOperator)) {
            continue;
        }
        ReduceSinkOperator rs = (ReduceSinkOperator) op;
        TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs);
        if (ts == null) {
            // skip, no semijoin branch
            continue;
        }
        // Found a semijoin branch.
        for (Operator<?> parent : mapjoinOp.getParentOperators()) {
            if (!(parent instanceof ReduceSinkOperator)) {
                continue;
            }
            Set<TableScanOperator> tsOps = OperatorUtils.findOperatorsUpstream(parent, TableScanOperator.class);
            for (TableScanOperator parentTS : tsOps) {
                // If the parent is same as the ts, then we have a cycle.
                if (ts == parentTS) {
                    semiJoinMap.put(rs, ts);
                    break;
                }
            }
        }
    }
    if (semiJoinMap.size() > 0) {
        for (ReduceSinkOperator rs : semiJoinMap.keySet()) {
            GenTezUtils.removeBranch(rs);
            GenTezUtils.removeSemiJoinOperator(parseContext, rs, semiJoinMap.get(rs));
        }
    }
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) HashMap(java.util.HashMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)

Example 10 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class BucketingSortingOpProcFactory method extractTraits.

static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop, Operator<?> childop) throws SemanticException {
    List<ExprNodeDesc> outputValues = Collections.emptyList();
    if (childop instanceof SelectOperator) {
        SelectDesc select = ((SelectOperator) childop).getConf();
        outputValues = ExprNodeDescUtils.backtrack(select.getColList(), childop, rop);
    }
    if (outputValues.isEmpty()) {
        return;
    }
    // Go through the set of partition columns, and find their representatives in the values
    // These represent the bucketed columns
    List<BucketCol> bucketCols = extractBucketCols(rop, outputValues);
    // Go through the set of key columns, and find their representatives in the values
    // These represent the sorted columns
    List<SortCol> sortCols = extractSortCols(rop, outputValues);
    List<ColumnInfo> colInfos = childop.getSchema().getSignature();
    if (!bucketCols.isEmpty()) {
        List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
        bctx.setBucketedCols(childop, newBucketCols);
    }
    if (!sortCols.isEmpty()) {
        List<SortCol> newSortCols = getNewSortCols(sortCols, colInfos);
        bctx.setSortedCols(childop, newSortCols);
    }
}
Also used : SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) BucketCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol) BucketSortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)23 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)14 ArrayList (java.util.ArrayList)10 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)9 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)8 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)7 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)7 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)7 Test (org.junit.Test)7 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)5 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)5 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)5 Operator (org.apache.hadoop.hive.ql.exec.Operator)5 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)5 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)5 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)4 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)4 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)4