Search in sources :

Example 11 with TypeCheckCtx

use of org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx in project hive by apache.

the class SemanticAnalyzer method genSelectPlan.

@SuppressWarnings("nls")
private Operator<?> genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator<?> input, Operator<?> inputForSelectStar, boolean outerLV) throws SemanticException {
    LOG.debug("tree: {}", selExprList.toStringTree());
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    RowResolver out_rwsch = new RowResolver();
    ASTNode trfm = null;
    Integer pos = 0;
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    RowResolver starRR = null;
    if (inputForSelectStar != null && inputForSelectStar != input) {
        starRR = opParseCtx.get(inputForSelectStar).getRowResolver();
    }
    // SELECT * or SELECT TRANSFORM(*)
    boolean selectStar = false;
    int posn = 0;
    boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT);
    if (hintPresent) {
        posn++;
    }
    boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM);
    if (isInTransform) {
        queryProperties.setUsesScript(true);
        globalLimitCtx.setHasTransformOrUDTF(true);
        trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
    }
    // Detect queries of the form SELECT udtf(col) AS ...
    // by looking for a function as the first child, and then checking to see
    // if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
    // the lack of a special token.
    boolean isUDTF = false;
    String udtfTableAlias = null;
    List<String> udtfColAliases = new ArrayList<String>();
    ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
    GenericUDTF genericUDTF = null;
    int udtfExprType = udtfExpr.getType();
    if (udtfExprType == HiveParser.TOK_FUNCTION || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
        String funcName = TypeCheckProcFactory.getFunctionText(udtfExpr, true);
        FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
        if (fi != null) {
            genericUDTF = fi.getGenericUDTF();
        }
        isUDTF = (genericUDTF != null);
        if (isUDTF) {
            globalLimitCtx.setHasTransformOrUDTF(true);
        }
        if (isUDTF && !fi.isNative()) {
            unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr.getChild(0));
        }
        if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
            genExprNodeDescRegex(".*", null, (ASTNode) udtfExpr.getChild(0), colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        }
    }
    if (isUDTF) {
        // Only support a single expression when it's a UDTF
        if (selExprList.getChildCount() > 1) {
            throw new SemanticException(generateErrorMessage((ASTNode) selExprList.getChild(1), ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
        }
        ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
        // column names also can be inferred from result of UDTF
        for (int i = 1; i < selExpr.getChildCount(); i++) {
            ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
            switch(selExprChild.getType()) {
                case HiveParser.Identifier:
                    udtfColAliases.add(unescapeIdentifier(selExprChild.getText().toLowerCase()));
                    unparseTranslator.addIdentifierTranslation(selExprChild);
                    break;
                case HiveParser.TOK_TABALIAS:
                    assert (selExprChild.getChildCount() == 1);
                    udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0).getText());
                    qb.addAlias(udtfTableAlias);
                    unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild.getChild(0));
                    break;
                default:
                    assert (false);
            }
        }
        LOG.debug("UDTF table alias is {}", udtfTableAlias);
        LOG.debug("UDTF col aliases are {}", udtfColAliases);
    }
    // The list of expressions after SELECT or SELECT TRANSFORM.
    ASTNode exprList;
    if (isInTransform) {
        exprList = (ASTNode) trfm.getChild(0);
    } else if (isUDTF) {
        exprList = udtfExpr;
    } else {
        exprList = selExprList;
    }
    LOG.debug("genSelectPlan: input = {} starRr = {}", inputRR, starRR);
    // For UDTF's, skip the function name to get the expressions
    int startPosn = isUDTF ? posn + 1 : posn;
    if (isInTransform) {
        startPosn = 0;
    }
    final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() || !qb.getParseInfo().getDestGroupingSets().isEmpty() || !qb.getParseInfo().getDestCubes().isEmpty());
    Set<String> colAliases = new HashSet<String>();
    int offset = 0;
    // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
    for (int i = startPosn; i < exprList.getChildCount(); ++i) {
        // child can be EXPR AS ALIAS, or EXPR.
        ASTNode child = (ASTNode) exprList.getChild(i);
        boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
        boolean isWindowSpec = child.getChildCount() == 3 && child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC;
        // AST's are slightly different.
        if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) {
            throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(2), ErrorMsg.INVALID_AS.getMsg()));
        }
        // The real expression
        ASTNode expr;
        String tabAlias;
        String colAlias;
        if (isInTransform || isUDTF) {
            tabAlias = null;
            colAlias = autogenColAliasPrfxLbl + i;
            expr = child;
        } else {
            // Get rid of TOK_SELEXPR
            expr = (ASTNode) child.getChild(0);
            String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, autogenColAliasPrfxIncludeFuncName, i + offset);
            tabAlias = colRef[0];
            colAlias = colRef[1];
            if (hasAsClause) {
                unparseTranslator.addIdentifierTranslation((ASTNode) child.getChild(1));
            }
        }
        colAliases.add(colAlias);
        // The real expression
        if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
            int initPos = pos;
            pos = genExprNodeDescRegex(".*", expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
            if (unparseTranslator.isEnabled()) {
                offset += pos - initPos - 1;
            }
            selectStar = true;
        } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) {
            // In case the expression is a regex COL.
            // This can only happen without AS clause
            // We don't allow this for ExprResolver - the Group By case
            pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) {
            // In case the expression is TABLE.COL (col can be regex).
            // This can only happen without AS clause
            // We don't allow this for ExprResolver - the Group By case
            pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        } else {
            // Case when this is an expression
            TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, true, isCBOExecuted());
            // We allow stateful functions in the SELECT list (but nowhere else)
            tcCtx.setAllowStatefulFunctions(true);
            tcCtx.setAllowDistinctFunctions(false);
            if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) {
                // If CBO did not optimize the query, we might need to replace grouping function
                // Special handling of grouping function
                expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr, !cubeRollupGrpSetPresent);
            }
            ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
            String recommended = recommendName(exp, colAlias);
            if (recommended != null && !colAliases.contains(recommended) && out_rwsch.get(null, recommended) == null) {
                colAlias = recommended;
            }
            colList.add(exp);
            ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), exp.getWritableObjectInspector(), tabAlias, false);
            colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) && ((ExprNodeColumnDesc) exp).isSkewedCol());
            out_rwsch.put(tabAlias, colAlias, colInfo);
            if (exp instanceof ExprNodeColumnDesc) {
                ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
                String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
                if (altMapping != null) {
                    out_rwsch.put(altMapping[0], altMapping[1], colInfo);
                }
            }
            pos++;
        }
    }
    selectStar = selectStar && exprList.getChildCount() == posn + 1;
    out_rwsch = handleInsertStatementSpec(colList, dest, out_rwsch, qb, selExprList);
    List<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < colList.size(); i++) {
        String outputCol = getColumnInternalName(i);
        colExprMap.put(outputCol, colList.get(i));
        columnNames.add(outputCol);
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, selectStar), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    output.setColumnExprMap(colExprMap);
    if (isInTransform) {
        output = genScriptPlan(trfm, qb, output);
    }
    if (isUDTF) {
        output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb, output, outerLV);
    }
    LOG.debug("Created Select Plan row schema: {}", out_rwsch);
    return output;
}
Also used : TypeCheckCtx(org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) WindowFunctionInfo(org.apache.hadoop.hive.ql.exec.WindowFunctionInfo) FunctionInfo(org.apache.hadoop.hive.ql.exec.FunctionInfo) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) GenericUDTF(org.apache.hadoop.hive.ql.udf.generic.GenericUDTF) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet)

Example 12 with TypeCheckCtx

use of org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx in project hive by apache.

the class ShowPartitionAnalyzer method getShowPartitionsFilter.

@VisibleForTesting
ExprNodeDesc getShowPartitionsFilter(Table table, ASTNode command) throws SemanticException {
    ExprNodeDesc showFilter = null;
    for (int childIndex = 0; childIndex < command.getChildCount(); childIndex++) {
        ASTNode astChild = (ASTNode) command.getChild(childIndex);
        if (astChild.getType() == HiveParser.TOK_WHERE) {
            RowResolver rwsch = new RowResolver();
            Map<String, String> colTypes = new HashMap<String, String>();
            for (FieldSchema fs : table.getPartCols()) {
                rwsch.put(table.getTableName(), fs.getName(), new ColumnInfo(fs.getName(), TypeInfoFactory.stringTypeInfo, null, true));
                colTypes.put(fs.getName().toLowerCase(), fs.getType());
            }
            TypeCheckCtx tcCtx = new TypeCheckCtx(rwsch);
            ASTNode conds = (ASTNode) astChild.getChild(0);
            Map<ASTNode, ExprNodeDesc> nodeOutputs = ExprNodeTypeCheck.genExprNode(conds, tcCtx);
            ExprNodeDesc target = nodeOutputs.get(conds);
            if (!(target instanceof ExprNodeGenericFuncDesc) || !target.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) {
                String errorMsg = tcCtx.getError() != null ? ". " + tcCtx.getError() : "";
                throw new SemanticException("Not a filter expr: " + (target == null ? "null" : target.getExprString()) + errorMsg);
            }
            showFilter = replaceDefaultPartNameAndCastType(target, colTypes, HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME));
        }
    }
    return showFilter;
}
Also used : TypeCheckCtx(org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 13 with TypeCheckCtx

use of org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx in project hive by apache.

the class ShowPartitionAnalyzer method getShowPartitionsOrder.

private String getShowPartitionsOrder(Table table, ASTNode command) throws SemanticException {
    String orderBy = null;
    for (int childIndex = 0; childIndex < command.getChildCount(); childIndex++) {
        ASTNode astChild = (ASTNode) command.getChild(childIndex);
        if (astChild.getType() == HiveParser.TOK_ORDERBY) {
            Map<String, Integer> poses = new HashMap<String, Integer>();
            RowResolver rwsch = new RowResolver();
            for (int i = 0; i < table.getPartCols().size(); i++) {
                FieldSchema fs = table.getPartCols().get(i);
                rwsch.put(table.getTableName(), fs.getName(), new ColumnInfo(fs.getName(), TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()), null, true));
                poses.put(fs.getName().toLowerCase(), i);
            }
            TypeCheckCtx tcCtx = new TypeCheckCtx(rwsch);
            StringBuilder colIndices = new StringBuilder();
            StringBuilder order = new StringBuilder();
            int ccount = astChild.getChildCount();
            for (int i = 0; i < ccount; ++i) {
                // @TODO: implement null first or last
                ASTNode cl = (ASTNode) astChild.getChild(i);
                if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
                    order.append("+");
                    cl = (ASTNode) cl.getChild(0).getChild(0);
                } else if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEDESC) {
                    order.append("-");
                    cl = (ASTNode) cl.getChild(0).getChild(0);
                } else {
                    order.append("+");
                }
                Map<ASTNode, ExprNodeDesc> nodeOutputs = ExprNodeTypeCheck.genExprNode(cl, tcCtx);
                ExprNodeDesc desc = nodeOutputs.get(cl);
                if (!(desc instanceof ExprNodeColumnDesc)) {
                    throw new SemanticException("Only partition keys are allowed for " + "sorting partition names, input: " + cl.toStringTree());
                }
                String col = ((ExprNodeColumnDesc) desc).getColumn().toLowerCase();
                colIndices.append(poses.get(col)).append(",");
            }
            colIndices.setLength(colIndices.length() - 1);
            orderBy = colIndices + ":" + order;
        }
    }
    return orderBy;
}
Also used : TypeCheckCtx(org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

TypeCheckCtx (org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx)13 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 HashMap (java.util.HashMap)4 ASTNode (org.apache.hadoop.hive.ql.parse.ASTNode)4 RowResolver (org.apache.hadoop.hive.ql.parse.RowResolver)4 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)4 ArrayList (java.util.ArrayList)3 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)3 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)3 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)3 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)3 HashSet (java.util.HashSet)2 LinkedHashMap (java.util.LinkedHashMap)2 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)2 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)2 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)2 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)2 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)2