Search in sources :

Example 86 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TypeCheckProcFactory method processGByExpr.

/**
   * Function to do groupby subexpression elimination. This is called by all the
   * processors initially. As an example, consider the query select a+b,
   * count(1) from T group by a+b; Then a+b is already precomputed in the group
   * by operators key, so we substitute a+b in the select list with the internal
   * column name of the a+b expression that appears in the in input row
   * resolver.
   *
   * @param nd
   *          The node that is being inspected.
   * @param procCtx
   *          The processor context.
   *
   * @return exprNodeColumnDesc.
   */
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
    // We recursively create the exprNodeDesc. Base cases: when we encounter
    // a column ref, we convert that into an exprNodeColumnDesc; when we
    // encounter
    // a constant, we convert that into an exprNodeConstantDesc. For others we
    // just
    // build the exprNodeFuncDesc with recursively built children.
    ASTNode expr = (ASTNode) nd;
    TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
    //                                  having key in (select .. where a = min(b.value)
    if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
        return null;
    }
    RowResolver input = ctx.getInputRR();
    ExprNodeDesc desc = null;
    if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
        return null;
    }
    // If the current subExpression is pre-calculated, as in Group-By etc.
    ColumnInfo colInfo = input.getExpression(expr);
    // try outer row resolver
    RowResolver outerRR = ctx.getOuterRR();
    if (colInfo == null && outerRR != null) {
        colInfo = outerRR.getExpression(expr);
    }
    if (colInfo != null) {
        desc = new ExprNodeColumnDesc(colInfo);
        ASTNode source = input.getExpressionSource(expr);
        if (source != null) {
            ctx.getUnparseTranslator().addCopyTranslation(expr, source);
        }
        return desc;
    }
    return desc;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 87 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class SemanticAnalyzer method genReduceSinkPlan.

private Operator genReduceSinkPlan(String dest, QB qb, Operator<?> input, int numReducers, boolean hasOrderBy) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    // First generate the expression for the partition and sort keys
    // The cluster by clause / distribute by clause has the aliases for
    // partition function
    ASTNode partitionExprs = qb.getParseInfo().getClusterByForClause(dest);
    if (partitionExprs == null) {
        partitionExprs = qb.getParseInfo().getDistributeByForClause(dest);
    }
    ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
    if (partitionExprs != null) {
        int ccount = partitionExprs.getChildCount();
        for (int i = 0; i < ccount; ++i) {
            ASTNode cl = (ASTNode) partitionExprs.getChild(i);
            partCols.add(genExprNodeDesc(cl, inputRR));
        }
    }
    ASTNode sortExprs = qb.getParseInfo().getClusterByForClause(dest);
    if (sortExprs == null) {
        sortExprs = qb.getParseInfo().getSortByForClause(dest);
    }
    if (sortExprs == null) {
        sortExprs = qb.getParseInfo().getOrderByForClause(dest);
        if (sortExprs != null) {
            assert numReducers == 1;
            // in strict mode, in the presence of order by, limit must be specified
            if (qb.getParseInfo().getDestLimit(dest) == null) {
                String error = StrictChecks.checkNoLimit(conf);
                if (error != null) {
                    throw new SemanticException(generateErrorMessage(sortExprs, error));
                }
            }
        }
    }
    ArrayList<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
    StringBuilder order = new StringBuilder();
    StringBuilder nullOrder = new StringBuilder();
    if (sortExprs != null) {
        int ccount = sortExprs.getChildCount();
        for (int i = 0; i < ccount; ++i) {
            ASTNode cl = (ASTNode) sortExprs.getChild(i);
            if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
                // SortBy ASC
                order.append("+");
                cl = (ASTNode) cl.getChild(0);
                if (cl.getType() == HiveParser.TOK_NULLS_FIRST) {
                    nullOrder.append("a");
                } else if (cl.getType() == HiveParser.TOK_NULLS_LAST) {
                    nullOrder.append("z");
                } else {
                    throw new SemanticException("Unexpected null ordering option: " + cl.getType());
                }
                cl = (ASTNode) cl.getChild(0);
            } else if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEDESC) {
                // SortBy DESC
                order.append("-");
                cl = (ASTNode) cl.getChild(0);
                if (cl.getType() == HiveParser.TOK_NULLS_FIRST) {
                    nullOrder.append("a");
                } else if (cl.getType() == HiveParser.TOK_NULLS_LAST) {
                    nullOrder.append("z");
                } else {
                    throw new SemanticException("Unexpected null ordering option: " + cl.getType());
                }
                cl = (ASTNode) cl.getChild(0);
            } else {
                // ClusterBy
                order.append("+");
                nullOrder.append("a");
            }
            ExprNodeDesc exprNode = genExprNodeDesc(cl, inputRR);
            sortCols.add(exprNode);
        }
    }
    Operator result = genReduceSinkPlan(input, partCols, sortCols, order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, true);
    if (result.getParentOperators().size() == 1 && result.getParentOperators().get(0) instanceof ReduceSinkOperator) {
        ((ReduceSinkOperator) result.getParentOperators().get(0)).getConf().setHasOrderBy(hasOrderBy);
    }
    return result;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 88 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.

/**
   * Generate the second GroupByOperator for the Group By Plan
   * (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
   * aggregation based on the partial aggregation results.
   *
   * @param mode
   *          the mode of aggregation (FINAL)
   * @param genericUDAFEvaluators
   *          The mapping from Aggregation StringTree to the
   *          genericUDAFEvaluator.
   * @return the new GroupByOperator
   * @throws SemanticException
   */
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
    RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
    RowResolver groupByOutputRowResolver2 = new RowResolver();
    groupByOutputRowResolver2.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
        if (exprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
        }
        String expression = exprInfo.getInternalName();
        groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
        groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
        addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    int groupingSetsPosition = -1;
    // For grouping sets, add a dummy grouping key
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
    }
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        ASTNode value = entry.getValue();
        ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
        if (paraExprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
        }
        String paraExpression = paraExprInfo.getInternalName();
        assert (paraExpression != null);
        aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 89 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ReduceSinkOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {
        numRows = 0;
        cntr = 1;
        logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
        statsMap.put(getCounterName(Counter.RECORDS_OUT_INTERMEDIATE, hconf), recordCounter);
        List<ExprNodeDesc> keys = conf.getKeyCols();
        if (isLogDebugEnabled) {
            LOG.debug("keys size is " + keys.size());
            for (ExprNodeDesc k : keys) {
                LOG.debug("Key exprNodeDesc " + k.getExprString());
            }
        }
        keyEval = new ExprNodeEvaluator[keys.size()];
        int i = 0;
        for (ExprNodeDesc e : keys) {
            if (e instanceof ExprNodeConstantDesc && (BUCKET_NUMBER_COL_NAME).equals(((ExprNodeConstantDesc) e).getValue())) {
                buckColIdxInKeyForSdpo = i;
            }
            keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
        }
        numDistributionKeys = conf.getNumDistributionKeys();
        distinctColIndices = conf.getDistinctColumnIndices();
        numDistinctExprs = distinctColIndices.size();
        valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
        i = 0;
        for (ExprNodeDesc e : conf.getValueCols()) {
            valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
        }
        partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
        i = 0;
        for (ExprNodeDesc e : conf.getPartitionCols()) {
            int index = ExprNodeDescUtils.indexOf(e, keys);
            partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
        }
        if (conf.getBucketCols() != null && !conf.getBucketCols().isEmpty()) {
            bucketEval = new ExprNodeEvaluator[conf.getBucketCols().size()];
            i = 0;
            for (ExprNodeDesc e : conf.getBucketCols()) {
                int index = ExprNodeDescUtils.indexOf(e, keys);
                bucketEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
            }
            buckColIdxInKey = conf.getPartitionCols().size();
        }
        tag = conf.getTag();
        tagByte[0] = (byte) tag;
        skipTag = conf.getSkipTag();
        if (isLogInfoEnabled) {
            LOG.info("Using tag = " + tag);
        }
        TableDesc keyTableDesc = conf.getKeySerializeInfo();
        keySerializer = (Serializer) keyTableDesc.getDeserializerClass().newInstance();
        keySerializer.initialize(null, keyTableDesc.getProperties());
        keyIsText = keySerializer.getSerializedClass().equals(Text.class);
        TableDesc valueTableDesc = conf.getValueSerializeInfo();
        valueSerializer = (Serializer) valueTableDesc.getDeserializerClass().newInstance();
        valueSerializer.initialize(null, valueTableDesc.getProperties());
        int limit = conf.getTopN();
        float memUsage = conf.getTopNMemoryUsage();
        if (limit >= 0 && memUsage > 0) {
            reducerHash = conf.isPTFReduceSink() ? new PTFTopNHash() : new TopNHash();
            reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this, conf, hconf);
        }
        useUniformHash = conf.getReducerTraits().contains(UNIFORM);
        firstRow = true;
    } catch (Exception e) {
        String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
        LOG.error(msg, e);
        throw new RuntimeException(e);
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Text(org.apache.hadoop.io.Text) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 90 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class DynamicPartitionPruner method initialize.

private void initialize() throws SerDeException {
    this.clear();
    Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>();
    // sources represent vertex names
    Set<String> sources = work.getEventSourceTableDescMap().keySet();
    sourcesWaitingForEvents.addAll(sources);
    for (String s : sources) {
        // Set to 0 to start with. This will be decremented for all columns for which events
        // are generated by this source - which is eventually used to determine number of expected
        // events for the source. #colums X #tasks
        numExpectedEventsPerSource.put(s, new MutableInt(0));
        numEventsSeenPerSource.put(s, new MutableInt(0));
        // Virtual relation generated by the reduce sync
        List<TableDesc> tables = work.getEventSourceTableDescMap().get(s);
        // Real column name - on which the operation is being performed
        List<String> columnNames = work.getEventSourceColumnNameMap().get(s);
        // Column type
        List<String> columnTypes = work.getEventSourceColumnTypeMap().get(s);
        // Expression for the operation. e.g. N^2 > 10
        List<ExprNodeDesc> partKeyExprs = work.getEventSourcePartKeyExprMap().get(s);
        // eventSourceTableDesc, eventSourceColumnName, evenSourcePartKeyExpr move in lock-step.
        // One entry is added to each at the same time
        Iterator<String> cit = columnNames.iterator();
        Iterator<String> typit = columnTypes.iterator();
        Iterator<ExprNodeDesc> pit = partKeyExprs.iterator();
        // A single source can process multiple columns, and will send an event for each of them.
        for (TableDesc t : tables) {
            numExpectedEventsPerSource.get(s).decrement();
            ++sourceInfoCount;
            String columnName = cit.next();
            String columnType = typit.next();
            ExprNodeDesc partKeyExpr = pit.next();
            SourceInfo si = createSourceInfo(t, partKeyExpr, columnName, columnType, jobConf);
            if (!sourceInfoMap.containsKey(s)) {
                sourceInfoMap.put(s, new ArrayList<SourceInfo>());
            }
            List<SourceInfo> sis = sourceInfoMap.get(s);
            sis.add(si);
            // the union of the values in that case.
            if (columnMap.containsKey(columnName)) {
                // All Sources are initialized up front. Events from different sources will end up getting added to the same list.
                // Pruning is disabled if either source sends in an event which causes pruning to be skipped
                si.values = columnMap.get(columnName).values;
                si.skipPruning = columnMap.get(columnName).skipPruning;
            }
            columnMap.put(columnName, si);
        }
    }
}
Also used : HashMap(java.util.HashMap) MutableInt(org.apache.commons.lang3.mutable.MutableInt) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32