Search in sources :

Example 26 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class GroupByOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    numRowsInput = 0;
    numRowsHashTbl = 0;
    heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
    countAfterReport = 0;
    groupingSetsPresent = conf.isGroupingSetsPresent();
    ObjectInspector rowInspector = inputObjInspectors[0];
    // init keyFields
    int numKeys = conf.getKeys().size();
    keyFields = new ExprNodeEvaluator[numKeys];
    keyObjectInspectors = new ObjectInspector[numKeys];
    currentKeyObjectInspectors = new ObjectInspector[numKeys];
    for (int i = 0; i < numKeys; i++) {
        keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
        keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
        currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
    }
    // each row
    if (groupingSetsPresent) {
        groupingSets = conf.getListGroupingSets();
        groupingSetsPosition = conf.getGroupingSetPosition();
        newKeysGroupingSets = new IntWritable[groupingSets.size()];
        groupingSetsBitSet = new FastBitSet[groupingSets.size()];
        int pos = 0;
        for (Integer groupingSet : groupingSets) {
            // Create the mapping corresponding to the grouping set
            newKeysGroupingSets[pos] = new IntWritable(groupingSet);
            groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
            pos++;
        }
    }
    // initialize unionExpr for reduce-side
    // reduce KEY has union field as the last field if there are distinct
    // aggregates in group-by.
    List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
    if (sfs.size() > 0) {
        StructField keyField = sfs.get(0);
        if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
            ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
            if (keyObjInspector instanceof StructObjectInspector) {
                List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
                if (keysfs.size() > 0) {
                    // the last field is the union field, if any
                    StructField sf = keysfs.get(keysfs.size() - 1);
                    if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
                        unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
                        unionExprEval.initialize(rowInspector);
                    }
                }
            }
        }
    }
    // init aggregationParameterFields
    ArrayList<AggregationDesc> aggrs = conf.getAggregators();
    aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
    aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
    aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
    aggregationParameterObjects = new Object[aggrs.size()][];
    aggregationIsDistinct = new boolean[aggrs.size()];
    for (int i = 0; i < aggrs.size(); i++) {
        AggregationDesc aggr = aggrs.get(i);
        ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
        aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
        aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
        aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
        aggregationParameterObjects[i] = new Object[parameters.size()];
        for (int j = 0; j < parameters.size(); j++) {
            aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
            aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
            if (unionExprEval != null) {
                String[] names = parameters.get(j).getExprString().split("\\.");
                // parameters of the form : KEY.colx:t.coly
                if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
                    String name = names[names.length - 2];
                    int tag = Integer.parseInt(name.split("\\:")[1]);
                    if (aggr.getDistinct()) {
                        // is distinct
                        Set<Integer> set = distinctKeyAggrs.get(tag);
                        if (null == set) {
                            set = new HashSet<Integer>();
                            distinctKeyAggrs.put(tag, set);
                        }
                        if (!set.contains(i)) {
                            set.add(i);
                        }
                    } else {
                        Set<Integer> set = nonDistinctKeyAggrs.get(tag);
                        if (null == set) {
                            set = new HashSet<Integer>();
                            nonDistinctKeyAggrs.put(tag, set);
                        }
                        if (!set.contains(i)) {
                            set.add(i);
                        }
                    }
                } else {
                    // will be KEY._COLx or VALUE._COLx
                    if (!nonDistinctAggrs.contains(i)) {
                        nonDistinctAggrs.add(i);
                    }
                }
            } else {
                if (aggr.getDistinct()) {
                    aggregationIsDistinct[i] = true;
                }
            }
            aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
            aggregationParameterObjects[i][j] = null;
        }
        if (parameters.size() == 0) {
            // for ex: count(*)
            if (!nonDistinctAggrs.contains(i)) {
                nonDistinctAggrs.add(i);
            }
        }
    }
    // init aggregationClasses
    aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
    for (int i = 0; i < aggregationEvaluators.length; i++) {
        AggregationDesc agg = conf.getAggregators().get(i);
        aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
    }
    MapredContext context = MapredContext.get();
    if (context != null) {
        for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
            context.setup(genericUDAFEvaluator);
        }
    }
    // grouping id should be pruned, which is the last of key columns
    // see ColumnPrunerGroupByProc
    outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
    // init objectInspectors
    ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
    for (int i = 0; i < outputKeyLength; i++) {
        objectInspectors[i] = currentKeyObjectInspectors[i];
    }
    for (int i = 0; i < aggregationEvaluators.length; i++) {
        objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
    }
    aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
    if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
        aggregations = newAggregations();
        hashAggr = false;
    } else {
        hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
        aggregations = newAggregations();
        hashAggr = true;
        keyPositionsSize = new ArrayList<Integer>();
        aggrPositions = new List[aggregations.length];
        groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
        // compare every groupbyMapAggrInterval rows
        numRowsCompareHashAggr = groupbyMapAggrInterval;
        minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    }
    List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
    outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
    KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
    newKeys = keyWrapperFactory.getKeyWrapper();
    isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
    isLlap = isTez && HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap");
    numExecutors = isLlap ? HiveConf.getIntVar(hconf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS) : 1;
    firstRow = true;
    // is not known, estimate that based on the number of entries
    if (hashAggr) {
        computeMaxEntriesHashAggr();
    }
    memoryMXBean = ManagementFactory.getMemoryMXBean();
    maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
    memoryThreshold = this.getConf().getMemoryThreshold();
    LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IntWritable(org.apache.hadoop.io.IntWritable) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) LazyBinaryObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 27 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class VectorizationContext method getIdentityExpression.

/**
   * Used as a fast path for operations that don't modify their input, like unary +
   * and casting boolean to long. IdentityExpression and its children are always
   * projections.
   */
private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList) throws HiveException {
    ExprNodeDesc childExpr = childExprList.get(0);
    int inputCol;
    String colType;
    VectorExpression v1 = null;
    if (childExpr instanceof ExprNodeGenericFuncDesc) {
        v1 = getVectorExpression(childExpr);
        inputCol = v1.getOutputColumn();
        colType = v1.getOutputType();
    } else if (childExpr instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
        inputCol = getInputColumnIndex(colDesc.getColumn());
        colType = colDesc.getTypeString();
    } else {
        throw new HiveException("Expression not supported: " + childExpr);
    }
    VectorExpression expr = new IdentityExpression(inputCol, colType);
    if (v1 != null) {
        expr.setChildExpressions(new VectorExpression[] { v1 });
    }
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString)

Example 28 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class ConvertAstToSearchArg method findVariable.

/**
   * Find the variable in the expression.
   * @param expr the expression to look in
   * @return the index of the variable or -1 if there is not exactly one
   *   variable.
   */
private int findVariable(ExprNodeDesc expr) {
    int result = -1;
    List<ExprNodeDesc> children = expr.getChildren();
    for (int i = 0; i < children.size(); ++i) {
        ExprNodeDesc child = children.get(i);
        if (child instanceof ExprNodeColumnDesc) {
            // if we already found a variable, this isn't a sarg
            if (result != -1) {
                return -1;
            } else {
                result = i;
            }
        }
    }
    return result;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 29 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class ConvertAstToSearchArg method parse.

/**
   * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
   * @param expression the Hive ExprNodeDesc
   */
private void parse(ExprNodeDesc expression) {
    // handle the special cases.
    if (expression.getClass() != ExprNodeGenericFuncDesc.class) {
        // if it is a reference to a boolean column, covert it to a truth test.
        if (expression instanceof ExprNodeColumnDesc) {
            ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) expression;
            if (columnDesc.getTypeString().equals("boolean")) {
                builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN, true);
                return;
            }
        }
        // otherwise, we don't know what to do so make it a maybe
        builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
        return;
    }
    // get the kind of expression
    ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression;
    Class<?> op = expr.getGenericUDF().getClass();
    // handle the logical operators
    if (op == GenericUDFOPOr.class) {
        builder.startOr();
        addChildren(expr);
        builder.end();
    } else if (op == GenericUDFOPAnd.class) {
        builder.startAnd();
        addChildren(expr);
        builder.end();
    } else if (op == GenericUDFOPNot.class) {
        builder.startNot();
        addChildren(expr);
        builder.end();
    } else if (op == GenericUDFOPEqual.class) {
        createLeaf(PredicateLeaf.Operator.EQUALS, expr);
    } else if (op == GenericUDFOPNotEqual.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.EQUALS, expr);
        builder.end();
    } else if (op == GenericUDFOPEqualNS.class) {
        createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr);
    } else if (op == GenericUDFOPGreaterThan.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
        builder.end();
    } else if (op == GenericUDFOPEqualOrGreaterThan.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
        builder.end();
    } else if (op == GenericUDFOPLessThan.class) {
        createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
    } else if (op == GenericUDFOPEqualOrLessThan.class) {
        createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
    } else if (op == GenericUDFIn.class) {
        createLeaf(PredicateLeaf.Operator.IN, expr, 0);
    } else if (op == GenericUDFBetween.class) {
        createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
    } else if (op == GenericUDFOPNull.class) {
        createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
    } else if (op == GenericUDFOPNotNull.class) {
        builder.startNot();
        createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
        builder.end();
    // otherwise, we didn't understand it, so mark it maybe
    } else {
        builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
    }
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) GenericUDFOPEqualNS(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull)

Example 30 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class MapJoinProcessor method genSelectPlan.

protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
    List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
    input.setChildOperators(null);
    // create a dummy select - This select is needed by the walker to split the
    // mapJoin later on
    RowSchema inputRS = input.getSchema();
    ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputs = new ArrayList<String>();
    List<String> outputCols = input.getConf().getOutputColumnNames();
    ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
        ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
        exprs.add(colDesc);
        outputs.add(internalName);
        ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
        newCol.setAlias(valueInfo.getAlias());
        outputRS.add(newCol);
        colExprMap.put(internalName, colDesc);
    }
    SelectDesc select = new SelectDesc(exprs, outputs, false);
    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
    sel.setColumnExprMap(colExprMap);
    // Insert the select operator in between.
    sel.setChildOperators(childOps);
    for (Operator<? extends OperatorDesc> ch : childOps) {
        ch.replaceParent(input, sel);
    }
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)161 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)145 ArrayList (java.util.ArrayList)93 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)88 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)78 Test (org.junit.Test)65 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)43 HashMap (java.util.HashMap)40 LinkedHashMap (java.util.LinkedHashMap)30 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)28 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)22 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)21 Operator (org.apache.hadoop.hive.ql.exec.Operator)19 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)19 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)19 List (java.util.List)17 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)17 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)17