Search in sources :

Example 36 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanReduceSinkOperator2MR.

/**
 * Generate the second ReduceSinkOperator for the Group By Plan
 * (parseInfo.getXXX(dest)). The new ReduceSinkOperator will be a child of
 * groupByOperatorInfo.
 *
 * The second ReduceSinkOperator will put the group by keys in the map-reduce
 * sort key, and put the partial aggregation results in the map-reduce value.
 *
 * @param numPartitionFields
 *          the number of fields in the map-reduce partition key. This should
 *          always be the same as the number of Group By keys. We should be
 *          able to remove this parameter since in this phase there is no
 *          distinct any more.
 * @return the new ReduceSinkOperator.
 * @throws SemanticException
 */
@SuppressWarnings("nls")
private Operator genGroupByPlanReduceSinkOperator2MR(QBParseInfo parseInfo, String dest, Operator groupByOperatorInfo, int numPartitionFields, int numReducers, boolean groupingSetsPresent) throws SemanticException {
    RowResolver reduceSinkInputRowResolver2 = opParseCtx.get(groupByOperatorInfo).getRowResolver();
    RowResolver reduceSinkOutputRowResolver2 = new RowResolver();
    reduceSinkOutputRowResolver2.setIsExprResolver(true);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    // Get group-by keys and store in reduceKeys
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(grpbyExpr).getType();
        ExprNodeColumnDesc inputExpr = new ExprNodeColumnDesc(typeInfo, field, "", false);
        reduceKeys.add(inputExpr);
        ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + field, typeInfo, "", false);
        reduceSinkOutputRowResolver2.putExpression(grpbyExpr, colInfo);
        colExprMap.put(colInfo.getInternalName(), inputExpr);
    }
    // add a key for reduce sink
    if (groupingSetsPresent) {
        // Note that partitioning fields dont need to change, since it is either
        // partitioned randomly, or by all grouping keys + distinct keys
        processGroupingSetReduceSinkOperator(reduceSinkInputRowResolver2, reduceSinkOutputRowResolver2, reduceKeys, outputColumnNames, colExprMap);
    }
    // Get partial aggregation results and store in reduceValues
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    int inputField = reduceKeys.size();
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        String field = getColumnInternalName(inputField);
        ASTNode t = entry.getValue();
        TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(t).getType();
        ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(typeInfo, field, "", false);
        reduceValues.add(exprDesc);
        inputField++;
        String col = getColumnInternalName(reduceValues.size() - 1);
        outputColumnNames.add(col);
        reduceSinkOutputRowResolver2.putExpression(t, new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, typeInfo, "", false));
        colExprMap.put(col, exprDesc);
    }
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, numReducers, AcidUtils.Operation.NOT_ACID), new RowSchema(reduceSinkOutputRowResolver2.getColumnInfos()), groupByOperatorInfo), reduceSinkOutputRowResolver2);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 37 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class BaseSemanticAnalyzer method validateCheckConstraint.

public static void validateCheckConstraint(List<FieldSchema> cols, List<SQLCheckConstraint> checkConstraints, Configuration conf) throws SemanticException {
    // create colinfo and then row resolver
    RowResolver rr = new RowResolver();
    for (FieldSchema col : cols) {
        ColumnInfo ci = new ColumnInfo(col.getName(), TypeInfoUtils.getTypeInfoFromTypeString(col.getType()), null, false);
        rr.put(null, col.getName(), ci);
    }
    TypeCheckCtx typeCheckCtx = new TypeCheckCtx(rr);
    // TypeCheckProcFactor expects typecheckctx to have unparse translator
    UnparseTranslator unparseTranslator = new UnparseTranslator(conf);
    typeCheckCtx.setUnparseTranslator(unparseTranslator);
    for (SQLCheckConstraint cc : checkConstraints) {
        try {
            ParseDriver parseDriver = new ParseDriver();
            ASTNode checkExprAST = parseDriver.parseExpression(cc.getCheck_expression());
            validateCheckExprAST(checkExprAST);
            Map<ASTNode, ExprNodeDesc> genExprs = TypeCheckProcFactory.genExprNode(checkExprAST, typeCheckCtx);
            ExprNodeDesc checkExpr = genExprs.get(checkExprAST);
            if (checkExpr == null) {
                throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Invalid type for CHECK constraint: ") + cc.getCheck_expression());
            }
            if (checkExpr.getTypeInfo().getTypeName() != serdeConstants.BOOLEAN_TYPE_NAME) {
                throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Only boolean type is supported for CHECK constraint: ") + cc.getCheck_expression() + ". Found: " + checkExpr.getTypeInfo().getTypeName());
            }
            validateCheckExpr(checkExpr);
        } catch (Exception e) {
            throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Invalid CHECK constraint expression: ") + cc.getCheck_expression() + ". " + e.getMessage());
        }
    }
}
Also used : SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) UnsupportedEncodingException(java.io.UnsupportedEncodingException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException)

Example 38 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class CalcitePlanner method handleInsertStatement.

// This function serves as the wrapper of handleInsertStatementSpec in
// SemanticAnalyzer
Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb) throws SemanticException {
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
    for (int i = 0; i < columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        colList.add(new ExprNodeColumnDesc(col));
    }
    ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
    RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, inputRR, qb, selExprList);
    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < colList.size(); i++) {
        String outputCol = getColumnInternalName(i);
        colExprMap.put(outputCol, colList.get(i));
        columnNames.add(outputCol);
    }
    Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    output.setColumnExprMap(colExprMap);
    return output;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 39 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class TypeCheckProcFactory method processGByExpr.

/**
 * Function to do groupby subexpression elimination. This is called by all the
 * processors initially. As an example, consider the query select a+b,
 * count(1) from T group by a+b; Then a+b is already precomputed in the group
 * by operators key, so we substitute a+b in the select list with the internal
 * column name of the a+b expression that appears in the in input row
 * resolver.
 *
 * @param nd
 *          The node that is being inspected.
 * @param procCtx
 *          The processor context.
 *
 * @return exprNodeColumnDesc.
 */
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
    // We recursively create the exprNodeDesc. Base cases: when we encounter
    // a column ref, we convert that into an exprNodeColumnDesc; when we
    // encounter
    // a constant, we convert that into an exprNodeConstantDesc. For others we
    // just
    // build the exprNodeFuncDesc with recursively built children.
    ASTNode expr = (ASTNode) nd;
    TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
    // having key in (select .. where a = min(b.value)
    if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
        return null;
    }
    RowResolver input = ctx.getInputRR();
    ExprNodeDesc desc = null;
    if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
        return null;
    }
    // If the current subExpression is pre-calculated, as in Group-By etc.
    ColumnInfo colInfo = input.getExpression(expr);
    // try outer row resolver
    RowResolver outerRR = ctx.getOuterRR();
    if (colInfo == null && outerRR != null) {
        colInfo = outerRR.getExpression(expr);
    }
    if (colInfo != null) {
        desc = new ExprNodeColumnDesc(colInfo);
        ASTNode source = input.getExpressionSource(expr);
        if (source != null && ctx.getUnparseTranslator() != null) {
            ctx.getUnparseTranslator().addCopyTranslation(expr, source);
        }
        return desc;
    }
    return desc;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 40 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveGBOpConvUtil method getValueKeysForRS.

/**
 * Get Value Keys for RS following MapSide GB
 *
 * @param GroupByOperator
 *          MapSide GB
 * @param outputKeyColumnNames
 * @param colExprMap
 * @return List of ExprNodeDesc of Values
 * @throws SemanticException
 */
private static ArrayList<ExprNodeDesc> getValueKeysForRS(Operator inOp, int aggStartPos, List<String> outputKeyColumnNames, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
    List<ColumnInfo> mapGBColInfoLst = inOp.getSchema().getSignature();
    ArrayList<ExprNodeDesc> valueKeys = null;
    if (aggStartPos >= mapGBColInfoLst.size()) {
        valueKeys = new ArrayList<ExprNodeDesc>();
    } else {
        valueKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, aggStartPos, mapGBColInfoLst.size() - 1, true, setColToNonVirtual);
        for (int i = 0; i < valueKeys.size(); ++i) {
            String outputColName = SemanticAnalyzer.getColumnInternalName(i);
            outputKeyColumnNames.add(outputColName);
            // TODO: Verify if this is needed (Why can't it be always null/empty
            String tabAlias = addEmptyTabAlias ? "" : null;
            ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + outputColName, valueKeys.get(i).getTypeInfo(), tabAlias, false);
            colInfoLst.add(colInfo);
            colExprMap.put(colInfo.getInternalName(), valueKeys.get(i));
        }
    }
    return valueKeys;
}
Also used : ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)132 ArrayList (java.util.ArrayList)82 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)79 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)62 HashMap (java.util.HashMap)53 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)49 LinkedHashMap (java.util.LinkedHashMap)41 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)39 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)33 Operator (org.apache.hadoop.hive.ql.exec.Operator)32 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)32 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)31 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)31 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)31 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)30 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)30 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)30 CheckConstraint (org.apache.hadoop.hive.ql.metadata.CheckConstraint)30 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)30 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)29