Search in sources :

Example 86 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class JoinVisitor method genJoin.

private JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
    // 1. Extract join type
    JoinCondDesc[] joinCondns;
    boolean semiJoin;
    boolean noOuterJoin;
    if (join instanceof HiveMultiJoin) {
        HiveMultiJoin hmj = (HiveMultiJoin) join;
        joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
        for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
            joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
        }
        semiJoin = false;
        noOuterJoin = !hmj.isOuterJoin();
    } else {
        joinCondns = new JoinCondDesc[1];
        JoinRelType joinRelType = JoinRelType.INNER;
        if (join instanceof Join) {
            joinRelType = ((Join) join).getJoinType();
        }
        JoinType joinType;
        switch(joinRelType) {
            case SEMI:
                joinType = JoinType.LEFTSEMI;
                semiJoin = true;
                break;
            case ANTI:
                joinType = JoinType.ANTI;
                semiJoin = true;
                break;
            default:
                assert join instanceof Join;
                joinType = transformJoinType(((Join) join).getJoinType());
                semiJoin = false;
        }
        joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
        noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
    }
    // 2. We create the join aux structures
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
    Operator<?>[] childOps = new Operator[children.size()];
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    int outputPos = 0;
    for (int pos = 0; pos < children.size(); pos++) {
        // 2.1. Backtracking from RS
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        if (inputRS.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = inputRS.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        int[] index = inputRS.getValueIndex();
        Byte tag = (byte) rsDesc.getTag();
        // 2.1.1. If semijoin...
        if (semiJoin && pos != 0) {
            exprMap.put(tag, new ArrayList<ExprNodeDesc>());
            childOps[pos] = inputRS;
            continue;
        }
        posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
        List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo info = new ColumnInfo(parentColumns.get(i));
            info.setInternalName(outputColumnNames.get(outputPos));
            info.setTabAlias(tabAlias);
            outputColumns.add(info);
            reversedExprs.put(outputColumnNames.get(outputPos), tag);
            outputPos++;
        }
        exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
        colExprMap.putAll(descriptors);
        childOps[pos] = inputRS;
    }
    // 3. We populate the filters and filterMap structure needed in the join descriptor
    List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
    int[][] filterMap = new int[children.size()][];
    for (int i = 0; i < children.size(); i++) {
        filtersPerInput.add(new ArrayList<ExprNodeDesc>());
    }
    // 3. We populate the filters structure
    for (int i = 0; i < filterExpressions.size(); i++) {
        int leftPos = joinCondns[i].getLeft();
        int rightPos = joinCondns[i].getRight();
        for (ExprNodeDesc expr : filterExpressions.get(i)) {
            // We need to update the exprNode, as currently
            // they refer to columns in the output of the join;
            // they should refer to the columns output by the RS
            int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
            if (inputPos == -1) {
                inputPos = leftPos;
            }
            filtersPerInput.get(inputPos).add(expr);
            if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
                if (inputPos == leftPos) {
                    updateFilterMap(filterMap, leftPos, rightPos);
                } else {
                    updateFilterMap(filterMap, rightPos, leftPos);
                }
            }
        }
    }
    for (int pos = 0; pos < children.size(); pos++) {
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        Byte tag = (byte) rsDesc.getTag();
        filters.put(tag, filtersPerInput.get(pos));
    }
    // 4. We create the join operator with its descriptor
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(filterMap);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    joinOp.getConf().setBaseSrc(baseSrc);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
    }
    return joinOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HashSet(java.util.HashSet) Set(java.util.Set) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) JoinCond(org.apache.hadoop.hive.ql.parse.JoinCond) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) JoinRelType(org.apache.calcite.rel.core.JoinRelType) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 87 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ExprProcFactory method getExprString.

/**
 * Get the expression string of an expression node.
 */
public static String getExprString(RowSchema rs, ExprNodeDesc expr, LineageCtx lctx, Operator<? extends OperatorDesc> inpOp, Predicate cond) {
    if (expr instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc col = (ExprNodeColumnDesc) expr;
        String internalName = col.getColumn();
        String alias = internalName;
        String tabAlias = col.getTabAlias();
        ColumnInfo ci = rs.getColumnInfo(internalName);
        if (ci != null) {
            if (ci.getAlias() != null) {
                alias = ci.getAlias();
            }
            if (ci.getTabAlias() != null) {
                tabAlias = ci.getTabAlias();
            }
        }
        Dependency dep = lctx.getIndex().getDependency(inpOp, internalName);
        if ((tabAlias == null || tabAlias.startsWith("_") || tabAlias.startsWith("$")) && (dep != null && dep.getType() == DependencyType.SIMPLE)) {
            Set<BaseColumnInfo> baseCols = dep.getBaseCols();
            if (baseCols != null && !baseCols.isEmpty()) {
                BaseColumnInfo baseCol = baseCols.iterator().next();
                tabAlias = baseCol.getTabAlias().getAlias();
                alias = baseCol.getColumn().getName();
            }
        }
        if (tabAlias != null && tabAlias.length() > 0 && !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) {
            if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && dep != null) {
                cond.getBaseCols().addAll(dep.getBaseCols());
            }
            return tabAlias + "." + alias;
        }
        if (dep != null) {
            if (cond != null) {
                cond.getBaseCols().addAll(dep.getBaseCols());
            }
            if (dep.getExpr() != null) {
                return dep.getExpr();
            }
        }
        if (alias.startsWith("_")) {
            ci = inpOp.getSchema().getColumnInfo(internalName);
            if (ci != null && ci.getAlias() != null) {
                alias = ci.getAlias();
            }
        }
        return alias;
    } else if (expr instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
        List<ExprNodeDesc> children = func.getChildren();
        String[] childrenExprStrings = new String[children.size()];
        for (int i = 0; i < childrenExprStrings.length; i++) {
            childrenExprStrings[i] = getExprString(rs, children.get(i), lctx, inpOp, cond);
        }
        return func.getGenericUDF().getDisplayString(childrenExprStrings);
    }
    return expr.getExprString();
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) BaseColumnInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) List(java.util.List) Dependency(org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency) BaseColumnInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo)

Example 88 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class BucketingSortingOpProcFactory method extractTraits.

static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop, Operator<?> childop) throws SemanticException {
    List<ExprNodeDesc> outputValues = Collections.emptyList();
    if (childop instanceof SelectOperator) {
        SelectDesc select = ((SelectOperator) childop).getConf();
        outputValues = ExprNodeDescUtils.backtrack(select.getColList(), childop, rop);
    }
    if (outputValues.isEmpty()) {
        return;
    }
    // Go through the set of partition columns, and find their representatives in the values
    // These represent the bucketed columns
    List<BucketCol> bucketCols = extractBucketCols(rop, outputValues);
    // Go through the set of key columns, and find their representatives in the values
    // These represent the sorted columns
    List<SortCol> sortCols = extractSortCols(rop, outputValues);
    List<ColumnInfo> colInfos = childop.getSchema().getSignature();
    if (!bucketCols.isEmpty()) {
        List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
        bctx.setBucketedCols(childop, newBucketCols);
    }
    if (!sortCols.isEmpty()) {
        List<SortCol> newSortCols = getNewSortCols(sortCols, colInfos);
        bctx.setSortedCols(childop, newSortCols);
    }
}
Also used : SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) BucketCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol) BucketSortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 89 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ConstraintExprGenerator method getNotNullConstraintExpr.

private T getNotNullConstraintExpr(Table targetTable, RowResolver inputRR, boolean isUpdateStatement) throws SemanticException {
    boolean forceNotNullConstraint = conf.getBoolVar(HiveConf.ConfVars.HIVE_ENFORCE_NOT_NULL_CONSTRAINT);
    if (!forceNotNullConstraint) {
        return null;
    }
    ImmutableBitSet nullConstraintBitSet;
    try {
        nullConstraintBitSet = getEnabledNotNullConstraints(targetTable);
    } catch (SemanticException e) {
        throw e;
    } catch (Exception e) {
        throw (new RuntimeException(e));
    }
    if (nullConstraintBitSet == null) {
        return null;
    }
    T currUDF = null;
    int constraintIdx = 0;
    List<ColumnInfo> inputColInfos = inputRR.getColumnInfos();
    for (int colExprIdx = 0; colExprIdx < inputColInfos.size(); colExprIdx++) {
        if (isUpdateStatement && colExprIdx == 0) {
            // for updates first column is _rowid
            continue;
        }
        if (nullConstraintBitSet.indexOf(constraintIdx) != -1) {
            T currExpr = typeCheckProcFactory.exprFactory.createColumnRefExpr(inputColInfos.get(colExprIdx), inputRR, 0);
            T isNotNullUDF = exprProcessor.getFuncExprNodeDesc("isnotnull", currExpr);
            if (currUDF != null) {
                currUDF = exprProcessor.getFuncExprNodeDesc("and", currUDF, isNotNullUDF);
            } else {
                currUDF = isNotNullUDF;
            }
        }
        constraintIdx++;
    }
    return currUDF;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 90 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class TypeCheckProcFactory method processGByExpr.

/**
 * Function to do groupby subexpression elimination. This is called by all the
 * processors initially. As an example, consider the query select a+b,
 * count(1) from T group by a+b; Then a+b is already precomputed in the group
 * by operators key, so we substitute a+b in the select list with the internal
 * column name of the a+b expression that appears in the in input row
 * resolver.
 *
 * @param nd      The node that is being inspected.
 * @param procCtx The processor context.
 * @return exprNodeColumnDesc.
 */
private T processGByExpr(Node nd, Object procCtx) throws SemanticException {
    // We recursively create the exprNodeDesc. Base cases: when we encounter
    // a column ref, we convert that into an exprNodeColumnDesc; when we
    // encounter
    // a constant, we convert that into an exprNodeConstantDesc. For others we
    // just
    // build the exprNodeFuncDesc with recursively built children.
    ASTNode expr = (ASTNode) nd;
    TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
    // having key in (select .. where a = min(b.value)
    if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
        return null;
    }
    RowResolver input = ctx.getInputRR();
    T desc = null;
    if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
        return null;
    }
    // If the current subExpression is pre-calculated, as in Group-By etc.
    ColumnInfo colInfo = input.getExpression(expr);
    RowResolver usedRR = input;
    int offset = 0;
    // try outer row resolver
    RowResolver outerRR = ctx.getOuterRR();
    if (colInfo == null && outerRR != null) {
        colInfo = outerRR.getExpression(expr);
        usedRR = outerRR;
        offset = input.getColumnInfos().size();
    }
    if (colInfo != null) {
        desc = exprFactory.createColumnRefExpr(colInfo, usedRR, offset);
        ASTNode source = input.getExpressionSource(expr);
        if (source != null && ctx.getUnparseTranslator() != null) {
            ctx.getUnparseTranslator().addCopyTranslation(expr, source);
        }
        return desc;
    }
    return desc;
}
Also used : ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)225 ArrayList (java.util.ArrayList)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)138 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)100 HashMap (java.util.HashMap)93 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)86 LinkedHashMap (java.util.LinkedHashMap)71 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)59 Operator (org.apache.hadoop.hive.ql.exec.Operator)48 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)47 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)47 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)45 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)45 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)45 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)45 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)45 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)45 Map (java.util.Map)41 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)39 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)38