Search in sources :

Example 6 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveOpConverter method genReduceSink.

@SuppressWarnings({ "rawtypes", "unchecked" })
private static ReduceSinkOperator genReduceSink(Operator<?> input, String tableAlias, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf) throws SemanticException {
    // dummy for backtracking
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(input));
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
    // Compute join keys and store in reduceKeys
    for (ExprNodeDesc key : keys) {
        reduceKeys.add(key);
        reduceKeysBack.add(ExprNodeDescUtils.backtrack(key, dummy, input));
    }
    // Walk over the input schema and copy in the output
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ColumnInfo> inputColumns = input.getSchema().getSignature();
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    List<String> outputColumnNames = new ArrayList<String>();
    int[] index = new int[inputColumns.size()];
    for (int i = 0; i < inputColumns.size(); i++) {
        ColumnInfo colInfo = inputColumns.get(i);
        String outputColName = colInfo.getInternalName();
        ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, input);
        int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
        if (kindex >= 0) {
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setAlias(outputColName);
            newColInfo.setTabAlias(tableAlias);
            outputColumns.add(newColInfo);
            index[i] = kindex;
            continue;
        }
        int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
        if (vindex >= 0) {
            index[i] = -vindex - 1;
            continue;
        }
        index[i] = -reduceValues.size() - 1;
        reduceValues.add(expr);
        reduceValuesBack.add(exprBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setAlias(outputColName);
        newColInfo.setTabAlias(tableAlias);
        outputColumns.add(newColInfo);
        outputColumnNames.add(outputColName);
    }
    dummy.setParentOperators(null);
    // Use only 1 reducer if no reduce keys
    if (reduceKeys.size() == 0) {
        numReducers = 1;
        // Cartesian product is not supported in strict mode
        String error = StrictChecks.checkCartesian(hiveConf);
        if (error != null)
            throw new SemanticException(error);
    }
    ReduceSinkDesc rsDesc;
    if (order.isEmpty()) {
        rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, reduceKeys.size(), numReducers, acidOperation);
    } else {
        rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, partitionCols, order, nullOrder, numReducers, acidOperation);
    }
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputColumns), input);
    List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
    }
    List<String> valColNames = rsDesc.getOutputValueColumnNames();
    for (int i = 0; i < valColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
    }
    rsOp.setValueIndex(index);
    rsOp.setColumnExprMap(colExprMap);
    rsOp.setInputAliases(input.getSchema().getTableNames().toArray(new String[input.getSchema().getTableNames().size()]));
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + rsOp + " with row schema: [" + rsOp.getSchema() + "]");
    }
    return rsOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 7 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveOpConverter method visit.

/**
   * TODO: 1. PPD needs to get pushed in to TS
   *
   * @param scanRel
   * @return
   */
OpAttr visit(HiveTableScan scanRel) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
    }
    RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
    // 1. Setup TableScan Desc
    // 1.1 Build col details used by scan
    ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
    List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>();
    List<Integer> neededColumnIDs = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    Set<Integer> vcolsInCalcite = new HashSet<Integer>();
    List<String> partColNames = new ArrayList<String>();
    Map<Integer, VirtualColumn> VColsMap = HiveCalciteUtil.getVColsMap(ht.getVirtualCols(), ht.getNoOfNonVirtualCols());
    Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap();
    Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap();
    List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
    List<String> scanColNames = scanRel.getRowType().getFieldNames();
    String tableAlias = scanRel.getConcatQbIDAlias();
    String colName;
    ColumnInfo colInfo;
    VirtualColumn vc;
    for (int index = 0; index < scanRel.getRowType().getFieldList().size(); index++) {
        colName = scanColNames.get(index);
        if (VColsMap.containsKey(index)) {
            vc = VColsMap.get(index);
            virtualCols.add(vc);
            colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
            vcolsInCalcite.add(index);
        } else if (posToPartColInfo.containsKey(index)) {
            partColNames.add(colName);
            colInfo = posToPartColInfo.get(index);
            vcolsInCalcite.add(index);
        } else {
            colInfo = posToNonPartColInfo.get(index);
        }
        colInfos.add(colInfo);
        if (neededColIndxsFrmReloptHT.contains(index)) {
            neededColumnIDs.add(index);
            neededColumnNames.add(colName);
        }
    }
    // 1.2 Create TableScanDesc
    TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
    // 1.3. Set Partition cols in TSDesc
    tsd.setPartColumns(partColNames);
    // 1.4. Set needed cols in TSDesc
    tsd.setNeededColumnIDs(neededColumnIDs);
    tsd.setNeededColumns(neededColumnNames);
    // 2. Setup TableScan
    TableScanOperator ts = (TableScanOperator) OperatorFactory.get(semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos));
    // tablescan with same alias.
    if (topOps.get(tableAlias) != null) {
        tableAlias = tableAlias + this.uniqueCounter;
    }
    topOps.put(tableAlias, ts);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
    }
    return new OpAttr(tableAlias, vcolsInCalcite, ts);
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) HashSet(java.util.HashSet)

Example 8 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveOpConverter method createColInfos.

private static Pair<ArrayList<ColumnInfo>, Set<Integer>> createColInfos(List<RexNode> calciteExprs, List<ExprNodeDesc> hiveExprs, List<String> projNames, OpAttr inpOpAf) {
    if (hiveExprs.size() != projNames.size()) {
        throw new RuntimeException("Column expressions list doesn't match Column Names list");
    }
    RexNode rexN;
    ExprNodeDesc pe;
    ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
    boolean vc;
    Set<Integer> newVColSet = new HashSet<Integer>();
    for (int i = 0; i < hiveExprs.size(); i++) {
        pe = hiveExprs.get(i);
        rexN = calciteExprs.get(i);
        vc = false;
        if (rexN instanceof RexInputRef) {
            if (inpOpAf.vcolsInCalcite.contains(((RexInputRef) rexN).getIndex())) {
                newVColSet.add(i);
                vc = true;
            }
        }
        colInfos.add(new ColumnInfo(projNames.get(i), pe.getTypeInfo(), inpOpAf.tabAlias, vc));
    }
    return new Pair<ArrayList<ColumnInfo>, Set<Integer>>(colInfos, newVColSet);
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RexInputRef(org.apache.calcite.rex.RexInputRef) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) HashSet(java.util.HashSet) Pair(org.apache.calcite.util.Pair)

Example 9 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveOpConverter method createColInfosSubset.

private static ArrayList<ColumnInfo> createColInfosSubset(Operator<?> input, List<String> keepColNames) {
    ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
    int pos = 0;
    for (ColumnInfo ci : input.getSchema().getSignature()) {
        if (pos < keepColNames.size() && ci.getInternalName().equals(keepColNames.get(pos))) {
            cInfoLst.add(new ColumnInfo(ci));
            pos++;
        }
    }
    return cInfoLst;
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Example 10 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ExprProcFactory method getExprString.

/**
   * Get the expression string of an expression node.
   */
public static String getExprString(RowSchema rs, ExprNodeDesc expr, LineageCtx lctx, Operator<? extends OperatorDesc> inpOp, Predicate cond) {
    if (expr instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc col = (ExprNodeColumnDesc) expr;
        String internalName = col.getColumn();
        String alias = internalName;
        String tabAlias = col.getTabAlias();
        ColumnInfo ci = rs.getColumnInfo(internalName);
        if (ci != null) {
            if (ci.getAlias() != null) {
                alias = ci.getAlias();
            }
            if (ci.getTabAlias() != null) {
                tabAlias = ci.getTabAlias();
            }
        }
        Dependency dep = lctx.getIndex().getDependency(inpOp, internalName);
        if ((tabAlias == null || tabAlias.startsWith("_") || tabAlias.startsWith("$")) && (dep != null && dep.getType() == DependencyType.SIMPLE)) {
            Set<BaseColumnInfo> baseCols = dep.getBaseCols();
            if (baseCols != null && !baseCols.isEmpty()) {
                BaseColumnInfo baseCol = baseCols.iterator().next();
                tabAlias = baseCol.getTabAlias().getAlias();
                alias = baseCol.getColumn().getName();
            }
        }
        if (tabAlias != null && tabAlias.length() > 0 && !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) {
            if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && dep != null) {
                cond.getBaseCols().addAll(dep.getBaseCols());
            }
            return tabAlias + "." + alias;
        }
        if (dep != null) {
            if (cond != null) {
                cond.getBaseCols().addAll(dep.getBaseCols());
            }
            if (dep.getExpr() != null) {
                return dep.getExpr();
            }
        }
        if (alias.startsWith("_")) {
            ci = inpOp.getSchema().getColumnInfo(internalName);
            if (ci != null && ci.getAlias() != null) {
                alias = ci.getAlias();
            }
        }
        return alias;
    } else if (expr instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
        List<ExprNodeDesc> children = func.getChildren();
        String[] childrenExprStrings = new String[children.size()];
        for (int i = 0; i < childrenExprStrings.length; i++) {
            childrenExprStrings[i] = getExprString(rs, children.get(i), lctx, inpOp, cond);
        }
        return func.getGenericUDF().getDisplayString(childrenExprStrings);
    }
    return expr.getExprString();
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) BaseColumnInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ArrayList(java.util.ArrayList) List(java.util.List) Dependency(org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency) BaseColumnInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)117 ArrayList (java.util.ArrayList)75 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)69 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)56 HashMap (java.util.HashMap)46 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)43 LinkedHashMap (java.util.LinkedHashMap)35 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)34 Operator (org.apache.hadoop.hive.ql.exec.Operator)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)27 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)27 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)26 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)25 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)24 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)21 Map (java.util.Map)20 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)20 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)19 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)19 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)18