Search in sources :

Example 31 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method setupStats.

private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException {
    // if it is not analyze command and not column stats, then do not gatherstats
    if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) {
        tsDesc.setGatherStats(false);
    } else {
        if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
            String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
            LOG.debug("Set stats collection dir : " + statsTmpLoc);
            tsDesc.setTmpStatsDir(statsTmpLoc);
        }
        tsDesc.setGatherStats(true);
        tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        // append additional virtual columns for storing statistics
        Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator();
        List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
        while (vcs.hasNext()) {
            VirtualColumn vc = vcs.next();
            rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
            vcList.add(vc);
        }
        tsDesc.addVirtualCols(vcList);
        String tblName = tab.getTableName();
        // Theoretically the key prefix could be any unique string shared
        // between TableScanOperator (when publishing) and StatsTask (when aggregating).
        // Here we use
        // db_name.table_name + partitionSec
        // as the prefix for easy of read during explain and debugging.
        // Currently, partition spec can only be static partition.
        String k = org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR;
        tsDesc.setStatsAggPrefix(tab.getDbName() + "." + k);
        // set up WriteEntity for replication
        outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED));
        // add WriteEntity for each matching partition
        if (tab.isPartitioned()) {
            List<String> cols = new ArrayList<String>();
            if (qbp.getAnalyzeRewrite() != null) {
                List<FieldSchema> partitionCols = tab.getPartCols();
                for (FieldSchema fs : partitionCols) {
                    cols.add(fs.getName());
                }
                tsDesc.setPartColumns(cols);
                return;
            }
            TableSpec tblSpec = qbp.getTableSpec(alias);
            Map<String, String> partSpec = tblSpec.getPartSpec();
            if (partSpec != null) {
                cols.addAll(partSpec.keySet());
                tsDesc.setPartColumns(cols);
            } else {
                throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
            }
            List<Partition> partitions = qbp.getTableSpec().partitions;
            if (partitions != null) {
                for (Partition partn : partitions) {
                    // inputs.add(new ReadEntity(partn)); // is this needed at all?
                    LOG.info("XXX: adding part: " + partn);
                    outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
                }
            }
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 32 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanGroupByOperator.

/**
 * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
 * The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
 *
 * @param mode
 *          The mode of the aggregation (PARTIAL1 or COMPLETE)
 * @param genericUDAFEvaluators
 *          If not null, this function will store the mapping from Aggregation
 *          StringTree to the genericUDAFEvaluator in this parameter, so it
 *          can be used in the next-stage GroupBy aggregations.
 * @return the new GroupByOperator
 */
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(input).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
        if (exprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
        }
        groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo.getInternalName(), "", false));
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false);
        groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
        addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver);
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // For each aggregation
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    assert (aggregationTrees != null);
    // get the last colName for the reduce KEY
    // it represents the column name corresponding to distinct aggr, if any
    String lastKeyColName = null;
    List<String> inputKeyCols = rs.getConf().getOutputKeyColumnNames();
    if (inputKeyCols.size() > 0) {
        lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
    }
    List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
    int numDistinctUDFs = 0;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        // This is the GenericUDAF name
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
        boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
        // Convert children to aggParameters
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        // 0 is the function name
        for (int i = 1; i < value.getChildCount(); i++) {
            ASTNode paraExpr = (ASTNode) value.getChild(i);
            ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
            if (paraExprInfo == null) {
                throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
            }
            String paraExpression = paraExprInfo.getInternalName();
            assert (paraExpression != null);
            if (isDistinct && lastKeyColName != null) {
                // if aggr is distinct, the parameter is name is constructed as
                // KEY.lastKeyColName:<tag>._colx
                paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
            }
            ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
            ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
            if (reduceValue != null) {
                // this parameter is a constant
                expr = reduceValue;
            }
            aggParameters.add(expr);
        }
        if (isDistinct) {
            numDistinctUDFs++;
        }
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
        // GroupByOperators
        if (genericUDAFEvaluators != null) {
            genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
        }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, numDistinctUDFs > 0), new RowSchema(groupByOutputRowResolver.getColumnInfos()), input), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 33 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanMapGroupByOperator.

/**
 * Generate the map-side GroupByOperator for the Query Block
 * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
 * of the inputOperatorInfo.
 *
 * @param mode
 *          The mode of the aggregation (HASH)
 * @param genericUDAFEvaluators
 *          If not null, this function will store the mapping from Aggregation
 *          StringTree to the genericUDAFEvaluator in this parameter, so it
 *          can be used in the next-stage GroupBy aggregations.
 * @return the new GroupByOperator
 */
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, List<ASTNode> grpByExprs, Operator inputOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Long> groupingSetKeys, boolean groupingSetsPresent) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver);
        if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
            // Skip duplicated grouping keys, it happens when define column alias.
            grpByExprs.remove(i--);
            continue;
        }
        groupByKeys.add(grpByExprNode);
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        groupByOutputRowResolver.putExpression(grpbyExpr, new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // The grouping set key is present after the grouping keys, before the distinct keys
    int groupingSetsPosition = -1;
    // for the grouping set (corresponding to the rollup).
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
    }
    // If there is a distinctFuncExp, add all parameters to the reduceKeys.
    if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
        List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest);
        for (ASTNode value : list) {
            // 0 is function name
            for (int i = 1; i < value.getChildCount(); i++) {
                ASTNode parameter = (ASTNode) value.getChild(i);
                if (groupByOutputRowResolver.getExpression(parameter) == null) {
                    ExprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver);
                    groupByKeys.add(distExprNode);
                    String field = getColumnInternalName(groupByKeys.size() - 1);
                    outputColumnNames.add(field);
                    groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false));
                    colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
                }
            }
        }
    }
    // For each aggregation
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    assert (aggregationTrees != null);
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        // 0 is the function name
        for (int i = 1; i < value.getChildCount(); i++) {
            ASTNode paraExpr = (ASTNode) value.getChild(i);
            ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
            aggParameters.add(paraExprNode);
        }
        boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        if (groupByOutputRowResolver.getExpression(value) == null) {
            groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
        }
        // GroupByOperators
        if (genericUDAFEvaluators != null) {
            genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
        }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 34 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method getNotNullConstraintExpr.

private ExprNodeDesc getNotNullConstraintExpr(Table targetTable, Operator input, String dest) throws SemanticException {
    boolean forceNotNullConstraint = conf.getBoolVar(ConfVars.HIVE_ENFORCE_NOT_NULL_CONSTRAINT);
    if (!forceNotNullConstraint) {
        return null;
    }
    ImmutableBitSet nullConstraintBitSet = null;
    try {
        nullConstraintBitSet = getEnabledNotNullConstraints(targetTable);
    } catch (Exception e) {
        if (e instanceof SemanticException) {
            throw (SemanticException) e;
        } else {
            throw (new RuntimeException(e));
        }
    }
    if (nullConstraintBitSet == null) {
        return null;
    }
    List<ColumnInfo> colInfos = input.getSchema().getSignature();
    ExprNodeDesc currUDF = null;
    // Add NOT NULL constraints
    int constraintIdx = 0;
    for (int colExprIdx = 0; colExprIdx < colInfos.size(); colExprIdx++) {
        if (updating(dest) && colExprIdx == 0) {
            // for updates first column is _rowid
            continue;
        }
        if (nullConstraintBitSet.indexOf(constraintIdx) != -1) {
            ExprNodeDesc currExpr = TypeCheckProcFactory.toExprNodeDesc(colInfos.get(colExprIdx));
            ExprNodeDesc isNotNullUDF = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("isnotnull", currExpr);
            if (currUDF != null) {
                currUDF = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("and", currUDF, isNotNullUDF);
            } else {
                currUDF = isNotNullUDF;
            }
        }
        constraintIdx++;
    }
    return currUDF;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 35 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanReduceSinkOperator.

/**
 * Generate the ReduceSinkOperator for the Group By Query Block
 * (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child
 * of inputOperatorInfo.
 *
 * It will put all Group By keys and the distinct field (if any) in the
 * map-reduce sort key, and all other fields in the map-reduce value.
 *
 * @param numPartitionFields
 *          the number of fields for map-reduce partitioning. This is usually
 *          the number of fields in the Group By keys.
 * @return the new ReduceSinkOperator.
 * @throws SemanticException
 */
@SuppressWarnings("nls")
private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb, String dest, Operator inputOperatorInfo, List<ASTNode> grpByExprs, int numPartitionFields, boolean changeNumPartitionFields, int numReducers, boolean mapAggrDone, boolean groupingSetsPresent) throws SemanticException {
    RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver reduceSinkOutputRowResolver = new RowResolver();
    reduceSinkOutputRowResolver.setIsExprResolver(true);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    // Pre-compute group-by keys and store in reduceKeys
    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForReduceSink(grpByExprs, dest, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
    int keyLength = reduceKeys.size();
    int numOfColsRmedFromkey = grpByExprs.size() - keyLength;
    // add a key for reduce sink
    if (groupingSetsPresent) {
        // Process grouping set for the reduce sink operator
        processGroupingSetReduceSinkOperator(reduceSinkInputRowResolver, reduceSinkOutputRowResolver, reduceKeys, outputKeyColumnNames, colExprMap);
        if (changeNumPartitionFields) {
            numPartitionFields++;
        }
    }
    List<List<Integer>> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest, reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    if (!mapAggrDone) {
        getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap);
    } else {
        // Put partial aggregation results in reduceValues
        int inputField = reduceKeys.size() + numOfColsRmedFromkey;
        for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
            TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType();
            ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(type, getColumnInternalName(inputField), "", false);
            reduceValues.add(exprDesc);
            inputField++;
            String outputColName = getColumnInternalName(reduceValues.size() - 1);
            outputValueColumnNames.add(outputColName);
            String internalName = Utilities.ReduceField.VALUE.toString() + "." + outputColName;
            reduceSinkOutputRowResolver.putExpression(entry.getValue(), new ColumnInfo(internalName, type, null, false));
            colExprMap.put(internalName, exprDesc);
        }
    }
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, groupingSetsPresent ? keyLength + 1 : keyLength, reduceValues, distinctColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields, numReducers, AcidUtils.Operation.NOT_ACID), new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)132 ArrayList (java.util.ArrayList)82 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)79 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)62 HashMap (java.util.HashMap)53 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)49 LinkedHashMap (java.util.LinkedHashMap)41 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)39 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)33 Operator (org.apache.hadoop.hive.ql.exec.Operator)32 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)32 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)31 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)31 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)31 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)30 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)30 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)30 CheckConstraint (org.apache.hadoop.hive.ql.metadata.CheckConstraint)30 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)30 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)29