Search in sources :

Example 76 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genSamplePredicate.

/**
 * Generates the sampling predicate from the TABLESAMPLE clause information.
 * This function uses the bucket column list to decide the expression inputs
 * to the predicate hash function in case useBucketCols is set to true,
 * otherwise the expression list stored in the TableSample is used. The bucket
 * columns of the table are used to generate this predicate in case no
 * expressions are provided on the TABLESAMPLE clause and the table has
 * clustering columns defined in it's metadata. The predicate created has the
 * following structure:
 *
 * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
 *
 * @param ts
 *          TABLESAMPLE clause information
 * @param bucketCols
 *          The clustering columns of the table
 * @param useBucketCols
 *          Flag to indicate whether the bucketCols should be used as input to
 *          the hash function
 * @param alias
 *          The alias used for the table in the row resolver
 * @param rwsch
 *          The row resolver used to resolve column references
 * @param planExpr
 *          The plan tree for the expression. If the user specified this, the
 *          parse expressions are not used
 * @return exprNodeDesc
 * @exception SemanticException
 */
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, ExprNodeDesc planExpr, int bucketingVersion) throws SemanticException {
    ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
    ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
    ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
    List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
    if (planExpr != null) {
        args.add(planExpr);
    } else if (useBucketCols) {
        for (String col : bucketCols) {
            ColumnInfo ci = rwsch.get(alias, col);
            // TODO: change type to the one in the table schema
            args.add(new ExprNodeColumnDesc(ci));
        }
    } else {
        for (ASTNode expr : ts.getExprs()) {
            args.add(genExprNodeDesc(expr, rwsch));
        }
    }
    ExprNodeDesc equalsExpr = null;
    {
        ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args);
        LOG.info("hashfnExpr = " + hashfnExpr);
        ExprNodeDesc andExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
        LOG.info("andExpr = " + andExpr);
        ExprNodeDesc modExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("%", andExpr, denominatorExpr);
        LOG.info("modExpr = " + modExpr);
        LOG.info("numeratorExpr = " + numeratorExpr);
        equalsExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("==", modExpr, numeratorExpr);
        LOG.info("equalsExpr = " + equalsExpr);
    }
    return equalsExpr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFMurmurHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 77 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.

/**
 * Generate the second GroupByOperator for the Group By Plan
 * (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
 * aggregation based on the partial aggregation results.
 *
 * @param genericUDAFEvaluators
 *          The mapping from Aggregation StringTree to the
 *          genericUDAFEvaluator.
 * @return the new GroupByOperator
 * @throws SemanticException
 */
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
    RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
    RowResolver groupByOutputRowResolver2 = new RowResolver();
    groupByOutputRowResolver2.setIsExprResolver(true);
    List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    List<String> outputColumnNames = new ArrayList<String>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
        if (exprInfo == null) {
            throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), grpbyExpr));
        }
        String expression = exprInfo.getInternalName();
        groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
        groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
        addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    int groupingSetsPosition = -1;
    // For grouping sets, add a dummy grouping key
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
    }
    Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        List<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        ASTNode value = entry.getValue();
        ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
        if (paraExprInfo == null) {
            throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), value));
        }
        String paraExpression = paraExprInfo.getInternalName();
        assert (paraExpression != null);
        aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap)

Example 78 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method handleInsertStatementSpec.

/**
 * This modifies the Select projections when the Select is part of an insert statement and
 * the insert statement specifies a column list for the target table, e.g.
 * create table source (a int, b int);
 * create table target (x int, y int, z int);
 * insert into target(z,x) select * from source
 *
 * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
 * as if the original query was written as
 * insert into target select b, null, a from source
 *
 * if target schema is not specified, this is no-op
 *
 * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
 * @throws SemanticException
 */
RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, RowResolver outputRR, QB qb, ASTNode selExprList) throws SemanticException {
    // (z,x)
    // specified in the query
    List<String> targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);
    if (targetTableSchema == null) {
        // no insert schema was specified
        return outputRR;
    }
    if (targetTableSchema.size() != col_list.size()) {
        Table target = qb.getMetaData().getDestTableForAlias(dest);
        Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
        throw new SemanticException(generateErrorMessage(selExprList, "Expected " + targetTableSchema.size() + " columns for " + dest + (target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) + "; select produces " + col_list.size() + " columns"));
    }
    // e.g. map z->expr for a
    Map<String, ExprNodeDesc> targetCol2Projection = new HashMap<String, ExprNodeDesc>();
    // e.g. map z->ColumnInfo for a
    Map<String, ColumnInfo> targetCol2ColumnInfo = new HashMap<String, ColumnInfo>();
    int colListPos = 0;
    for (String targetCol : targetTableSchema) {
        targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
        targetCol2Projection.put(targetCol, col_list.get(colListPos++));
    }
    Table target = qb.getMetaData().getDestTableForAlias(dest);
    Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
    if (target == null && partition == null) {
        throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'"));
    }
    List<ExprNodeDesc> newColList = new ArrayList<ExprNodeDesc>();
    colListPos = 0;
    List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols();
    List<String> targetTableColNames = new ArrayList<String>();
    List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>();
    for (FieldSchema fs : targetTableCols) {
        targetTableColNames.add(fs.getName());
        targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
    }
    Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest);
    if (partSpec != null) {
        // relies on consistent order via LinkedHashMap
        for (Map.Entry<String, String> partKeyVal : partSpec.entrySet()) {
            if (partKeyVal.getValue() == null) {
                // these must be after non-partition cols
                targetTableColNames.add(partKeyVal.getKey());
                targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
            }
        }
    }
    // now make the select produce <regular columns>,<dynamic partition columns> with
    // where missing columns are NULL-filled
    Table tbl = target == null ? partition.getTable() : target;
    RowResolver newOutputRR = getColForInsertStmtSpec(targetCol2Projection, tbl, targetCol2ColumnInfo, colListPos, targetTableColTypes, newColList, targetTableColNames);
    col_list.clear();
    col_list.addAll(newColList);
    return newOutputRR;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.ql.metadata.Table) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 79 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SubQueryUtils method buildSQJoinExpr.

/*
   * construct the ASTNode for the SQ column that will join with the OuterQuery Expression.
   * So for 'select ... from R1 where A in (select B from R2...)'
   * this will build (. (TOK_TABLE_OR_COL Identifier[SQ_1]) Identifier[B])
   * where 'SQ_1' is the alias generated for the SubQuery.
   */
static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) {
    List<ColumnInfo> signature = sqRR.getRowSchema().getSignature();
    ColumnInfo joinColumn = signature.get(0);
    String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName());
    return createColRefAST(sqAlias, joinColName[1]);
}
Also used : ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Example 80 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class OperatorHealthCheckerHook method checkOperator.

public static void checkOperator(Operator<?> op) {
    OperatorDesc conf = op.getConf();
    Map<String, ExprNodeDesc> exprMap = conf.getColumnExprMap();
    RowSchema schema = op.getSchema();
    checkSchema(schema);
    if (op instanceof SelectOperator) {
        checkSelectOperator((SelectOperator) op);
    }
    if (schema != null && exprMap != null) {
        for (Entry<String, ExprNodeDesc> c : exprMap.entrySet()) {
            if (c.getValue() instanceof ExprNodeConstantDesc) {
                continue;
            }
            ColumnInfo ci = schema.getColumnInfo(c.getKey());
            if (c.getKey().startsWith(Utilities.ReduceField.KEY + ".reducesinkkey")) {
                continue;
            }
            if (ci == null && conf.getComputedFields().contains(c.getKey())) {
                continue;
            }
            if (ci == null) {
                throw new RuntimeException("schema not found for " + c + " in " + schema);
            }
        }
        for (ColumnInfo sig : schema.getSignature()) {
            if (op instanceof ScriptOperator) {
                continue;
            }
            String iName = sig.getInternalName();
            ExprNodeDesc e = exprMap.get(iName);
            if (isSemiJoinRS(op)) {
                continue;
            }
            if (op.getConf() instanceof GroupByDesc) {
                continue;
            }
            if (e == null) {
                throw new RuntimeException("expr not found for " + iName + " in " + exprMap);
            }
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)225 ArrayList (java.util.ArrayList)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)138 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)100 HashMap (java.util.HashMap)93 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)86 LinkedHashMap (java.util.LinkedHashMap)71 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)59 Operator (org.apache.hadoop.hive.ql.exec.Operator)48 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)47 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)47 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)45 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)45 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)45 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)45 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)45 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)45 Map (java.util.Map)41 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)39 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)38