Search in sources :

Example 56 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanMapGroupByOperator.

/**
 * Generate the map-side GroupByOperator for the Query Block
 * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
 * of the inputOperatorInfo.
 *
 * @param mode
 *          The mode of the aggregation (HASH)
 * @param genericUDAFEvaluators
 *          If not null, this function will store the mapping from Aggregation
 *          StringTree to the genericUDAFEvaluator in this parameter, so it
 *          can be used in the next-stage GroupBy aggregations.
 * @return the new GroupByOperator
 */
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, List<ASTNode> grpByExprs, Operator inputOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Long> groupingSetKeys, boolean groupingSetsPresent) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver);
        if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
            // Skip duplicated grouping keys, it happens when define column alias.
            grpByExprs.remove(i--);
            continue;
        }
        groupByKeys.add(grpByExprNode);
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        groupByOutputRowResolver.putExpression(grpbyExpr, new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // The grouping set key is present after the grouping keys, before the distinct keys
    int groupingSetsPosition = -1;
    // for the grouping set (corresponding to the rollup).
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
    }
    // If there is a distinctFuncExp, add all parameters to the reduceKeys.
    if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
        List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest);
        for (ASTNode value : list) {
            // 0 is function name
            for (int i = 1; i < value.getChildCount(); i++) {
                ASTNode parameter = (ASTNode) value.getChild(i);
                if (groupByOutputRowResolver.getExpression(parameter) == null) {
                    ExprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver);
                    groupByKeys.add(distExprNode);
                    String field = getColumnInternalName(groupByKeys.size() - 1);
                    outputColumnNames.add(field);
                    groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false));
                    colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
                }
            }
        }
    }
    // For each aggregation
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    assert (aggregationTrees != null);
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        // 0 is the function name
        for (int i = 1; i < value.getChildCount(); i++) {
            ASTNode paraExpr = (ASTNode) value.getChild(i);
            ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
            aggParameters.add(paraExprNode);
        }
        boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        if (groupByOutputRowResolver.getExpression(value) == null) {
            groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
        }
        // GroupByOperators
        if (genericUDAFEvaluators != null) {
            genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
        }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 57 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanReduceSinkOperator.

/**
 * Generate the ReduceSinkOperator for the Group By Query Block
 * (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child
 * of inputOperatorInfo.
 *
 * It will put all Group By keys and the distinct field (if any) in the
 * map-reduce sort key, and all other fields in the map-reduce value.
 *
 * @param numPartitionFields
 *          the number of fields for map-reduce partitioning. This is usually
 *          the number of fields in the Group By keys.
 * @return the new ReduceSinkOperator.
 * @throws SemanticException
 */
@SuppressWarnings("nls")
private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb, String dest, Operator inputOperatorInfo, List<ASTNode> grpByExprs, int numPartitionFields, boolean changeNumPartitionFields, int numReducers, boolean mapAggrDone, boolean groupingSetsPresent) throws SemanticException {
    RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver reduceSinkOutputRowResolver = new RowResolver();
    reduceSinkOutputRowResolver.setIsExprResolver(true);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    // Pre-compute group-by keys and store in reduceKeys
    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForReduceSink(grpByExprs, dest, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
    int keyLength = reduceKeys.size();
    int numOfColsRmedFromkey = grpByExprs.size() - keyLength;
    // add a key for reduce sink
    if (groupingSetsPresent) {
        // Process grouping set for the reduce sink operator
        processGroupingSetReduceSinkOperator(reduceSinkInputRowResolver, reduceSinkOutputRowResolver, reduceKeys, outputKeyColumnNames, colExprMap);
        if (changeNumPartitionFields) {
            numPartitionFields++;
        }
    }
    List<List<Integer>> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest, reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    if (!mapAggrDone) {
        getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap);
    } else {
        // Put partial aggregation results in reduceValues
        int inputField = reduceKeys.size() + numOfColsRmedFromkey;
        for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
            TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType();
            ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(type, getColumnInternalName(inputField), "", false);
            reduceValues.add(exprDesc);
            inputField++;
            String outputColName = getColumnInternalName(reduceValues.size() - 1);
            outputValueColumnNames.add(outputColName);
            String internalName = Utilities.ReduceField.VALUE.toString() + "." + outputColName;
            reduceSinkOutputRowResolver.putExpression(entry.getValue(), new ColumnInfo(internalName, type, null, false));
            colExprMap.put(internalName, exprDesc);
        }
    }
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, groupingSetsPresent ? keyLength + 1 : keyLength, reduceValues, distinctColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields, numReducers, AcidUtils.Operation.NOT_ACID), new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 58 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanReduceSinkOperator2MR.

/**
 * Generate the second ReduceSinkOperator for the Group By Plan
 * (parseInfo.getXXX(dest)). The new ReduceSinkOperator will be a child of
 * groupByOperatorInfo.
 *
 * The second ReduceSinkOperator will put the group by keys in the map-reduce
 * sort key, and put the partial aggregation results in the map-reduce value.
 *
 * @param numPartitionFields
 *          the number of fields in the map-reduce partition key. This should
 *          always be the same as the number of Group By keys. We should be
 *          able to remove this parameter since in this phase there is no
 *          distinct any more.
 * @return the new ReduceSinkOperator.
 * @throws SemanticException
 */
@SuppressWarnings("nls")
private Operator genGroupByPlanReduceSinkOperator2MR(QBParseInfo parseInfo, String dest, Operator groupByOperatorInfo, int numPartitionFields, int numReducers, boolean groupingSetsPresent) throws SemanticException {
    RowResolver reduceSinkInputRowResolver2 = opParseCtx.get(groupByOperatorInfo).getRowResolver();
    RowResolver reduceSinkOutputRowResolver2 = new RowResolver();
    reduceSinkOutputRowResolver2.setIsExprResolver(true);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    // Get group-by keys and store in reduceKeys
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(grpbyExpr).getType();
        ExprNodeColumnDesc inputExpr = new ExprNodeColumnDesc(typeInfo, field, "", false);
        reduceKeys.add(inputExpr);
        ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + field, typeInfo, "", false);
        reduceSinkOutputRowResolver2.putExpression(grpbyExpr, colInfo);
        colExprMap.put(colInfo.getInternalName(), inputExpr);
    }
    // add a key for reduce sink
    if (groupingSetsPresent) {
        // Note that partitioning fields dont need to change, since it is either
        // partitioned randomly, or by all grouping keys + distinct keys
        processGroupingSetReduceSinkOperator(reduceSinkInputRowResolver2, reduceSinkOutputRowResolver2, reduceKeys, outputColumnNames, colExprMap);
    }
    // Get partial aggregation results and store in reduceValues
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    int inputField = reduceKeys.size();
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        String field = getColumnInternalName(inputField);
        ASTNode t = entry.getValue();
        TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(t).getType();
        ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(typeInfo, field, "", false);
        reduceValues.add(exprDesc);
        inputField++;
        String col = getColumnInternalName(reduceValues.size() - 1);
        outputColumnNames.add(col);
        reduceSinkOutputRowResolver2.putExpression(t, new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, typeInfo, "", false));
        colExprMap.put(col, exprDesc);
    }
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, numReducers, AcidUtils.Operation.NOT_ACID), new RowSchema(reduceSinkOutputRowResolver2.getColumnInfos()), groupByOperatorInfo), reduceSinkOutputRowResolver2);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 59 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class CalcitePlanner method handleInsertStatement.

// This function serves as the wrapper of handleInsertStatementSpec in
// SemanticAnalyzer
Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb) throws SemanticException {
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
    for (int i = 0; i < columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        colList.add(new ExprNodeColumnDesc(col));
    }
    ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
    RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, inputRR, qb, selExprList);
    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < colList.size(); i++) {
        String outputCol = getColumnInternalName(i);
        colExprMap.put(outputCol, colList.get(i));
        columnNames.add(outputCol);
    }
    Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    output.setColumnExprMap(colExprMap);
    return output;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 60 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class DDLSemanticAnalyzer method getFullPartitionSpecs.

/**
 * Get the partition specs from the tree. This stores the full specification
 * with the comparator operator into the output list.
 *
 * @param ast Tree to extract partitions from.
 * @param tab Table.
 * @return    Map of partitions by prefix length. Most of the time prefix length will
 *            be the same for all partition specs, so we can just OR the expressions.
 */
private Map<Integer, List<ExprNodeGenericFuncDesc>> getFullPartitionSpecs(CommonTree ast, Table tab, boolean canGroupExprs) throws SemanticException {
    String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME);
    Map<String, String> colTypes = new HashMap<String, String>();
    for (FieldSchema fs : tab.getPartitionKeys()) {
        colTypes.put(fs.getName().toLowerCase(), fs.getType());
    }
    Map<Integer, List<ExprNodeGenericFuncDesc>> result = new HashMap<Integer, List<ExprNodeGenericFuncDesc>>();
    for (int childIndex = 0; childIndex < ast.getChildCount(); childIndex++) {
        Tree partSpecTree = ast.getChild(childIndex);
        if (partSpecTree.getType() != HiveParser.TOK_PARTSPEC) {
            continue;
        }
        ExprNodeGenericFuncDesc expr = null;
        HashSet<String> names = new HashSet<String>(partSpecTree.getChildCount());
        for (int i = 0; i < partSpecTree.getChildCount(); ++i) {
            CommonTree partSpecSingleKey = (CommonTree) partSpecTree.getChild(i);
            assert (partSpecSingleKey.getType() == HiveParser.TOK_PARTVAL);
            String key = stripIdentifierQuotes(partSpecSingleKey.getChild(0).getText()).toLowerCase();
            String operator = partSpecSingleKey.getChild(1).getText();
            ASTNode partValNode = (ASTNode) partSpecSingleKey.getChild(2);
            TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null);
            ExprNodeConstantDesc valExpr = (ExprNodeConstantDesc) TypeCheckProcFactory.genExprNode(partValNode, typeCheckCtx).get(partValNode);
            Object val = valExpr.getValue();
            boolean isDefaultPartitionName = val.equals(defaultPartitionName);
            String type = colTypes.get(key);
            PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(type);
            if (type == null) {
                throw new SemanticException("Column " + key + " not found");
            }
            // Create the corresponding hive expression to filter on partition columns.
            if (!isDefaultPartitionName) {
                if (!valExpr.getTypeString().equals(type)) {
                    Converter converter = ObjectInspectorConverters.getConverter(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(valExpr.getTypeInfo()), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(pti));
                    val = converter.convert(valExpr.getValue());
                }
            }
            ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, key, null, true);
            ExprNodeGenericFuncDesc op;
            if (!isDefaultPartitionName) {
                op = makeBinaryPredicate(operator, column, new ExprNodeConstantDesc(pti, val));
            } else {
                GenericUDF originalOp = FunctionRegistry.getFunctionInfo(operator).getGenericUDF();
                String fnName;
                if (FunctionRegistry.isEq(originalOp)) {
                    fnName = "isnull";
                } else if (FunctionRegistry.isNeq(originalOp)) {
                    fnName = "isnotnull";
                } else {
                    throw new SemanticException("Cannot use " + operator + " in a default partition spec; only '=' and '!=' are allowed.");
                }
                op = makeUnaryPredicate(fnName, column);
            }
            // If it's multi-expr filter (e.g. a='5', b='2012-01-02'), AND with previous exprs.
            expr = (expr == null) ? op : makeBinaryPredicate("and", expr, op);
            names.add(key);
        }
        if (expr == null) {
            continue;
        }
        // We got the expr for one full partition spec. Determine the prefix length.
        int prefixLength = calculatePartPrefix(tab, names);
        List<ExprNodeGenericFuncDesc> orExpr = result.get(prefixLength);
        // If we don't, create a new separate filter. In most cases there will only be one.
        if (orExpr == null) {
            result.put(prefixLength, Lists.newArrayList(expr));
        } else if (canGroupExprs) {
            orExpr.set(0, makeBinaryPredicate("or", expr, orExpr.get(0)));
        } else {
            orExpr.add(expr);
        }
    }
    return result;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) CommonTree(org.antlr.runtime.tree.CommonTree) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) CommonTree(org.antlr.runtime.tree.CommonTree) Tree(org.antlr.runtime.tree.Tree) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) HashSet(java.util.HashSet)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)186 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)168 ArrayList (java.util.ArrayList)110 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)98 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)89 Test (org.junit.Test)68 HashMap (java.util.HashMap)53 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)49 LinkedHashMap (java.util.LinkedHashMap)35 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)34 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)30 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)28 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)26 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)24 Operator (org.apache.hadoop.hive.ql.exec.Operator)24 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 List (java.util.List)23 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)22 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)22 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)22