Search in sources :

Example 26 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class AnnotateRunTimeStatsOptimizer method setRuntimeStatsDir.

private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx) throws SemanticException {
    try {
        OperatorDesc conf = op.getConf();
        if (conf != null) {
            LOG.info("setRuntimeStatsDir for " + op.getOperatorId());
            String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(), op.getOperatorId()).toString();
            StatsPublisher statsPublisher = new FSStatsPublisher();
            StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf());
            runtimeStatsContext.setStatsTmpDir(path);
            if (!statsPublisher.init(runtimeStatsContext)) {
                LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
                throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
            }
            conf.setRuntimeStatsTmpDir(path);
        } else {
            LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId() + " because OperatorDesc is null");
        }
    } catch (HiveException e) {
        throw new SemanticException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) FSStatsPublisher(org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FSStatsPublisher(org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 27 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class HiveOpConverter method genReduceSink.

@SuppressWarnings({ "rawtypes", "unchecked" })
private static ReduceSinkOperator genReduceSink(Operator<?> input, String tableAlias, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf) throws SemanticException {
    // dummy for backtracking
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(input));
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
    // Compute join keys and store in reduceKeys
    for (ExprNodeDesc key : keys) {
        reduceKeys.add(key);
        reduceKeysBack.add(ExprNodeDescUtils.backtrack(key, dummy, input));
    }
    // Walk over the input schema and copy in the output
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ColumnInfo> inputColumns = input.getSchema().getSignature();
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    List<String> outputColumnNames = new ArrayList<String>();
    int[] index = new int[inputColumns.size()];
    for (int i = 0; i < inputColumns.size(); i++) {
        ColumnInfo colInfo = inputColumns.get(i);
        String outputColName = colInfo.getInternalName();
        ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, input);
        int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
        if (kindex >= 0) {
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setAlias(outputColName);
            newColInfo.setTabAlias(tableAlias);
            outputColumns.add(newColInfo);
            index[i] = kindex;
            continue;
        }
        int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
        if (vindex >= 0) {
            index[i] = -vindex - 1;
            continue;
        }
        index[i] = -reduceValues.size() - 1;
        reduceValues.add(expr);
        reduceValuesBack.add(exprBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setAlias(outputColName);
        newColInfo.setTabAlias(tableAlias);
        outputColumns.add(newColInfo);
        outputColumnNames.add(outputColName);
    }
    dummy.setParentOperators(null);
    // Use only 1 reducer if no reduce keys
    if (reduceKeys.size() == 0) {
        numReducers = 1;
        // Cartesian product is not supported in strict mode
        String error = StrictChecks.checkCartesian(hiveConf);
        if (error != null)
            throw new SemanticException(error);
    }
    ReduceSinkDesc rsDesc;
    if (order.isEmpty()) {
        rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, reduceKeys.size(), numReducers, acidOperation);
    } else {
        rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, partitionCols, order, nullOrder, numReducers, acidOperation);
    }
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputColumns), input);
    List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
    }
    List<String> valColNames = rsDesc.getOutputValueColumnNames();
    for (int i = 0; i < valColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
    }
    rsOp.setValueIndex(index);
    rsOp.setColumnExprMap(colExprMap);
    rsOp.setInputAliases(input.getSchema().getTableNames().toArray(new String[input.getSchema().getTableNames().size()]));
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + rsOp + " with row schema: [" + rsOp.getSchema() + "]");
    }
    return rsOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 28 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class HiveOpConverter method translateJoin.

private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
    // 0. Additional data structures needed for the join optimization
    // through Hive
    String[] baseSrc = new String[joinRel.getInputs().size()];
    String tabAlias = getHiveDerivedTableAlias();
    // 1. Convert inputs
    OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
    List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
    for (int i = 0; i < inputs.length; i++) {
        inputs[i] = dispatch(joinRel.getInput(i));
        children.add(inputs[i].inputs.get(0));
        baseSrc[i] = inputs[i].tabAlias;
    }
    // 2. Generate tags
    for (int tag = 0; tag < children.size(); tag++) {
        ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
        reduceSinkOp.getConf().setTag(tag);
    }
    // 3. Virtual columns
    Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
    newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
    if (joinRel instanceof HiveMultiJoin || !(joinRel instanceof SemiJoin)) {
        int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
        for (int i = 1; i < inputs.length; i++) {
            newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
            shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
    }
    // 4. Extract join key expressions from HiveSortExchange
    ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
    for (int i = 0; i < inputs.length; i++) {
        joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions();
    }
    // 5. Extract rest of join predicate info. We infer the rest of join condition
    //    that will be added to the filters (join conditions that are not part of
    //    the join key)
    List<RexNode> joinFilters;
    if (joinRel instanceof HiveJoin) {
        joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
    } else if (joinRel instanceof HiveMultiJoin) {
        joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
    } else if (joinRel instanceof HiveSemiJoin) {
        joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
    } else {
        throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
    }
    List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
    for (int i = 0; i < joinFilters.size(); i++) {
        List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
        if (joinFilters.get(i) != null) {
            for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
                ExprNodeDesc expr = convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
                filterExpressionsForInput.add(expr);
            }
        }
        filterExpressions.add(filterExpressionsForInput);
    }
    // 6. Generate Join operator
    JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
    // 7. Return result
    return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) RexNode(org.apache.calcite.rex.RexNode)

Example 29 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException {
    OpAttr inputOpAf = dispatch(exchangeRel.getInput());
    String tabAlias = inputOpAf.tabAlias;
    if (tabAlias == null || tabAlias.length() == 0) {
        tabAlias = getHiveDerivedTableAlias();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
    }
    RelDistribution distribution = exchangeRel.getDistribution();
    if (distribution.getType() != Type.HASH_DISTRIBUTED) {
        throw new SemanticException("Only hash distribution supported for LogicalExchange");
    }
    ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getJoinKeys().size()];
    for (int index = 0; index < exchangeRel.getJoinKeys().size(); index++) {
        expressions[index] = convertToExprNode(exchangeRel.getJoinKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf);
    }
    exchangeRel.setJoinExpressions(expressions);
    ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveConf);
    return new OpAttr(tabAlias, inputOpAf.vcolsInCalcite, rsOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RelDistribution(org.apache.calcite.rel.RelDistribution) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 30 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class RewriteQueryUsingAggregateIndexCtx method replaceGroupByOperatorProcess.

/**
   * We need to replace the count(indexed_column_key) GenericUDAF aggregation
   * function for group-by construct to "sum" GenericUDAF. This method creates a
   * new operator tree for a sample query that creates a GroupByOperator with
   * sum aggregation function and uses that GroupByOperator information to
   * replace the original GroupByOperator aggregation information. It replaces
   * the AggregationDesc (aggregation descriptor) of the old GroupByOperator
   * with the new Aggregation Desc of the new GroupByOperator.
   * @return
   */
private void replaceGroupByOperatorProcess(GroupByOperator operator, int index) throws SemanticException {
    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
    // We need to replace the GroupByOperator which is before RS
    if (index == 0) {
        // the query contains the sum aggregation GenericUDAF
        String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from `" + rewriteQueryCtx.getIndexName() + "` group by " + rewriteQueryCtx.getIndexKey() + " ";
        // retrieve the operator tree for the query, and the required GroupByOperator from it
        Operator<?> newOperatorTree = RewriteParseContextGenerator.generateOperatorTree(rewriteQueryCtx.getParseContext().getQueryState(), selReplacementCommand);
        // we get our new GroupByOperator here
        GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(newOperatorTree, GroupByOperator.class);
        if (newGbyOperator == null) {
            throw new SemanticException("Error replacing GroupBy operator.");
        }
        // we need this information to set the correct colList, outputColumnNames
        // in SelectOperator
        ExprNodeColumnDesc aggrExprNode = null;
        // Construct the new AggregationDesc to get rid of the current
        // internal names and replace them with new internal names
        // as required by the operator tree
        GroupByDesc newConf = newGbyOperator.getConf();
        List<AggregationDesc> newAggrList = newConf.getAggregators();
        if (newAggrList != null && newAggrList.size() > 0) {
            for (AggregationDesc aggregationDesc : newAggrList) {
                rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
                aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
                rewriteQueryCtx.setAggrExprNode(aggrExprNode);
            }
        }
        // Now the GroupByOperator has the new AggregationList;
        // sum(`_count_of_indexed_key`)
        // instead of count(indexed_key)
        GroupByDesc oldConf = operator.getConf();
        oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
        operator.setConf(oldConf);
    } else {
        // we just need to reset the GenericUDAFEvaluator and its name for this
        // GroupByOperator whose parent is the ReduceSinkOperator
        GroupByDesc childConf = operator.getConf();
        List<AggregationDesc> childAggrList = childConf.getAggregators();
        if (childAggrList != null && childAggrList.size() > 0) {
            for (AggregationDesc aggregationDesc : childAggrList) {
                List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
                List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
                for (ExprNodeDesc expr : paraList) {
                    parametersOIList.add(expr.getWritableObjectInspector());
                }
                GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum", parametersOIList, false, false);
                aggregationDesc.setGenericUDAFEvaluator(evaluator);
                aggregationDesc.setGenericUDAFName("sum");
            }
        }
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)91 ArrayList (java.util.ArrayList)47 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)30 HashMap (java.util.HashMap)24 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 LinkedHashMap (java.util.LinkedHashMap)17 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 Operator (org.apache.hadoop.hive.ql.exec.Operator)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 List (java.util.List)13 Path (org.apache.hadoop.fs.Path)13 Node (org.apache.hadoop.hive.ql.lib.Node)13 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)13 IOException (java.io.IOException)12 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)12 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)12 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)12 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)12 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)12 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)11