Search in sources :

Example 61 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TezCompiler method markSemiJoinForDPP.

private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
        // Not needed without semi-join reduction
        return;
    }
    // Stores the Tablescan operators processed to avoid redoing them.
    Map<TableScanOperator, TableScanOperator> tsOps = new HashMap<>();
    Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
    for (ReduceSinkOperator rs : map.keySet()) {
        SemiJoinBranchInfo sjInfo = map.get(rs);
        TableScanOperator ts = sjInfo.getTsOp();
        TableScanOperator tsInMap = tsOps.putIfAbsent(ts, ts);
        if (tsInMap != null) {
            // Already processed, skip
            continue;
        }
        if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) {
            continue;
        }
        // A TS can have multiple branches due to DPP Or Semijoin Opt.
        // Use DFS to traverse all the branches until RS or DPP is hit.
        Deque<Operator<?>> deque = new LinkedList<>();
        deque.add(ts);
        while (!deque.isEmpty()) {
            Operator<?> op = deque.pollLast();
            if (op instanceof AppMasterEventOperator && ((AppMasterEventOperator) op).getConf() instanceof DynamicPruningEventDesc) {
                // DPP. Now look up nDVs on both sides to see the selectivity.
                // <Parent Ops>-SEL-GB1-RS1-GB2-RS2
                SelectOperator selOp = null;
                try {
                    selOp = (SelectOperator) (rs.getParentOperators().get(0).getParentOperators().get(0).getParentOperators().get(0).getParentOperators().get(0));
                } catch (NullPointerException e) {
                    LOG.warn("markSemiJoinForDPP : Null pointer exception caught while accessing semijoin operators");
                    assert false;
                    return;
                }
                try {
                    // If stats are not available, just assume its a useful edge
                    Statistics stats = selOp.getStatistics();
                    ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(selOp.getConf().getColList().get(0));
                    long nDVs = stats.getColumnStatisticsFromColName(colExpr.getColumn()).getCountDistint();
                    if (nDVs > 0) {
                        // Lookup nDVs on TS side.
                        RuntimeValuesInfo rti = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(rs);
                        ExprNodeDesc tsExpr = rti.getTsColExpr();
                        FilterOperator fil = (FilterOperator) (ts.getChildOperators().get(0));
                        Statistics filStats = fil.getStatistics();
                        ExprNodeColumnDesc tsColExpr = ExprNodeDescUtils.getColumnExpr(tsExpr);
                        long nDVsOfTS = filStats.getColumnStatisticsFromColName(tsColExpr.getColumn()).getCountDistint();
                        double nDVsOfTSFactored = nDVsOfTS * procCtx.conf.getFloatVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_DPP_FACTOR);
                        if ((long) nDVsOfTSFactored > nDVs) {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("nDVs = " + nDVs + ", nDVsOfTS = " + nDVsOfTS + " and nDVsOfTSFactored = " + nDVsOfTSFactored + "Adding semijoin branch from ReduceSink " + rs + " to TS " + sjInfo.getTsOp());
                            }
                            sjInfo.setShouldRemove(false);
                        }
                    }
                } catch (NullPointerException e) {
                    sjInfo.setShouldRemove(false);
                }
                break;
            }
            if (op instanceof ReduceSinkOperator) {
                // Done with this branch
                continue;
            }
            deque.addAll(op.getChildOperators());
        }
    }
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) AnnotateWithStatistics(org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics) Statistics(org.apache.hadoop.hive.ql.plan.Statistics) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) LinkedList(java.util.LinkedList) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 62 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TypeCheckProcFactory method processGByExpr.

/**
 * Function to do groupby subexpression elimination. This is called by all the
 * processors initially. As an example, consider the query select a+b,
 * count(1) from T group by a+b; Then a+b is already precomputed in the group
 * by operators key, so we substitute a+b in the select list with the internal
 * column name of the a+b expression that appears in the in input row
 * resolver.
 *
 * @param nd
 *          The node that is being inspected.
 * @param procCtx
 *          The processor context.
 *
 * @return exprNodeColumnDesc.
 */
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
    // We recursively create the exprNodeDesc. Base cases: when we encounter
    // a column ref, we convert that into an exprNodeColumnDesc; when we
    // encounter
    // a constant, we convert that into an exprNodeConstantDesc. For others we
    // just
    // build the exprNodeFuncDesc with recursively built children.
    ASTNode expr = (ASTNode) nd;
    TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
    // having key in (select .. where a = min(b.value)
    if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
        return null;
    }
    RowResolver input = ctx.getInputRR();
    ExprNodeDesc desc = null;
    if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
        return null;
    }
    // If the current subExpression is pre-calculated, as in Group-By etc.
    ColumnInfo colInfo = input.getExpression(expr);
    // try outer row resolver
    RowResolver outerRR = ctx.getOuterRR();
    if (colInfo == null && outerRR != null) {
        colInfo = outerRR.getExpression(expr);
    }
    if (colInfo != null) {
        desc = new ExprNodeColumnDesc(colInfo);
        ASTNode source = input.getExpressionSource(expr);
        if (source != null && ctx.getUnparseTranslator() != null) {
            ctx.getUnparseTranslator().addCopyTranslation(expr, source);
        }
        return desc;
    }
    return desc;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 63 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class Vectorizer method validateExprNodeDescRecursive.

private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode, boolean allowComplex) {
    if (desc instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
        String columnName = c.getColumn();
        if (availableVectorizedVirtualColumnSet != null) {
            // For Map, check for virtual columns.
            VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
            if (virtualColumn != null) {
                if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) {
                    setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported");
                    return false;
                }
                // Remember we used this one in the query.
                neededVirtualColumnSet.add(virtualColumn);
            }
        }
    }
    String typeName = desc.getTypeInfo().getTypeName();
    boolean ret = validateDataType(typeName, mode, allowComplex && isVectorizationComplexTypesEnabled);
    if (!ret) {
        setExpressionIssue(expressionTitle, getValidateDataTypeErrorMsg(typeName, mode, allowComplex, isVectorizationComplexTypesEnabled));
        return false;
    }
    boolean isInExpression = false;
    if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
        boolean r = validateGenericUdf(d);
        if (!r) {
            setExpressionIssue(expressionTitle, "UDF " + d + " not supported");
            return false;
        }
        GenericUDF genericUDF = d.getGenericUDF();
        isInExpression = (genericUDF instanceof GenericUDFIn);
    }
    if (desc.getChildren() != null) {
        if (isInExpression && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
            // Always use loose FILTER mode.
            if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) {
                return false;
            }
        } else {
            for (ExprNodeDesc d : desc.getChildren()) {
                // Always use loose FILTER mode.
                if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */
                true)) {
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn)

Example 64 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class PartitionPruner method removeNonPartCols.

/**
 * See compactExpr. Some things in the expr are replaced with nulls for pruner, however
 * the virtual columns are not removed (ExprNodeColumnDesc cannot tell them apart from
 * partition columns), so we do it here.
 * The expression is only used to prune by partition name, so we have no business with VCs.
 * @param expr original partition pruning expression.
 * @param partCols list of partition columns for the table.
 * @param referred partition columns referred by expr
 * @return partition pruning expression that only contains partition columns from the list.
 */
private static ExprNodeDesc removeNonPartCols(ExprNodeDesc expr, List<String> partCols, Set<String> referred) {
    if (expr instanceof ExprNodeFieldDesc) {
        // list or struct fields.
        return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
    } else if (expr instanceof ExprNodeColumnDesc) {
        String column = ((ExprNodeColumnDesc) expr).getColumn();
        if (!partCols.contains(column)) {
            // Column doesn't appear to be a partition column for the table.
            return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
        }
        referred.add(column);
    } else if (expr instanceof ExprNodeGenericFuncDesc) {
        List<ExprNodeDesc> children = expr.getChildren();
        for (int i = 0; i < children.size(); ++i) {
            ExprNodeDesc other = removeNonPartCols(children.get(i), partCols, referred);
            if (ExprNodeDescUtils.isNullConstant(other)) {
                if (FunctionRegistry.isOpAnd(expr)) {
                    // partcol=... AND nonpartcol=...   is replaced with partcol=... AND TRUE
                    // which will be folded to partcol=...
                    // This cannot be done also for OR
                    Preconditions.checkArgument(expr.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo));
                    other = new ExprNodeConstantDesc(expr.getTypeInfo(), true);
                } else {
                    // and cause overaggressive prunning, missing data (incorrect result)
                    return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
                }
            }
            children.set(i, other);
        }
    }
    return expr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 65 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class DropPartitionHandler method genPartSpecs.

private Map<Integer, List<ExprNodeGenericFuncDesc>> genPartSpecs(Table table, List<Map<String, String>> partitions) throws SemanticException {
    Map<Integer, List<ExprNodeGenericFuncDesc>> partSpecs = new HashMap<>();
    int partPrefixLength = 0;
    if (partitions.size() > 0) {
        partPrefixLength = partitions.get(0).size();
    // pick the length of the first ptn, we expect all ptns listed to have the same number of
    // key-vals.
    }
    List<ExprNodeGenericFuncDesc> partitionDesc = new ArrayList<>();
    for (Map<String, String> ptn : partitions) {
        // convert each key-value-map to appropriate expression.
        ExprNodeGenericFuncDesc expr = null;
        for (Map.Entry<String, String> kvp : ptn.entrySet()) {
            String key = kvp.getKey();
            Object val = kvp.getValue();
            String type = table.getPartColByName(key).getType();
            PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(type);
            ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, key, null, true);
            ExprNodeGenericFuncDesc op = DDLSemanticAnalyzer.makeBinaryPredicate("=", column, new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, val));
            expr = (expr == null) ? op : DDLSemanticAnalyzer.makeBinaryPredicate("and", expr, op);
        }
        if (expr != null) {
            partitionDesc.add(expr);
        }
    }
    if (partitionDesc.size() > 0) {
        partSpecs.put(partPrefixLength, partitionDesc);
    }
    return partSpecs;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)186 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)168 ArrayList (java.util.ArrayList)110 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)98 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)89 Test (org.junit.Test)68 HashMap (java.util.HashMap)53 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)49 LinkedHashMap (java.util.LinkedHashMap)35 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)34 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)30 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)28 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)26 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)24 Operator (org.apache.hadoop.hive.ql.exec.Operator)24 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 List (java.util.List)23 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)22 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)22 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)22