Examples with RowSchema - org.apache.hadoop.hive.ql.exec.RowSchema

Example 81 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genScriptPlan.

@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) throws SemanticException {
    // If there is no "AS" clause, the output schema will be "key,value"
    ArrayList<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
    int inputSerDeNum = 1, inputRecordWriterNum = 2;
    int outputSerDeNum = 4, outputRecordReaderNum = 5;
    int outputColsNum = 6;
    boolean outputColNames = false, outputColSchemas = false;
    int execPos = 3;
    boolean defaultOutputCols = false;
    // Go over all the children
    if (trfm.getChildCount() > outputColsNum) {
        ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
        if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
            outputColNames = true;
        } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
            outputColSchemas = true;
        }
    }
    // If column type is not specified, use a string
    if (!outputColNames && !outputColSchemas) {
        String intName = getColumnInternalName(0);
        ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
        colInfo.setAlias("key");
        outputCols.add(colInfo);
        intName = getColumnInternalName(1);
        colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
        colInfo.setAlias("value");
        outputCols.add(colInfo);
        defaultOutputCols = true;
    } else {
        ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
        int ccount = collist.getChildCount();
        Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
        if (outputColNames) {
            for (int i = 0; i < ccount; ++i) {
                String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)).getText()).toLowerCase();
                failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
                String intName = getColumnInternalName(i);
                ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
                colInfo.setAlias(colAlias);
                outputCols.add(colInfo);
            }
        } else {
            for (int i = 0; i < ccount; ++i) {
                ASTNode child = (ASTNode) collist.getChild(i);
                assert child.getType() == HiveParser.TOK_TABCOL;
                String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)).getText()).toLowerCase();
                failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
                String intName = getColumnInternalName(i);
                ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child.getChild(1))), null, false);
                colInfo.setAlias(colAlias);
                outputCols.add(colInfo);
            }
        }
    }
    RowResolver out_rwsch = new RowResolver();
    StringBuilder columns = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    for (int i = 0; i < outputCols.size(); ++i) {
        if (i != 0) {
            columns.append(",");
            columnTypes.append(",");
        }
        columns.append(outputCols.get(i).getInternalName());
        columnTypes.append(outputCols.get(i).getType().getTypeName());
        out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), outputCols.get(i));
    }
    StringBuilder inpColumns = new StringBuilder();
    StringBuilder inpColumnTypes = new StringBuilder();
    ArrayList<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver().getColumnInfos();
    for (int i = 0; i < inputSchema.size(); ++i) {
        if (i != 0) {
            inpColumns.append(",");
            inpColumnTypes.append(",");
        }
        inpColumns.append(inputSchema.get(i).getInternalName());
        inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
    }
    TableDesc outInfo;
    TableDesc errInfo;
    TableDesc inInfo;
    String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
    Class<? extends Deserializer> serde;
    try {
        serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName, true, Utilities.getSessionSpecifiedClassLoader());
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e);
    }
    int fieldSeparator = Utilities.tabCode;
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
        fieldSeparator = Utilities.ctrlaCode;
    }
    // Input and Output Serdes
    if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
        inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false);
    } else {
        inInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes.toString(), false, true);
    }
    if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
        outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false);
    // This is for backward compatibility. If the user did not specify the
    // output column list, we assume that there are 2 columns: key and value.
    // However, if the script outputs: col1, col2, col3 seperated by TAB, the
    // requirement is: key is col and value is (col2 TAB col3)
    } else {
        outInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), columns.toString(), columnTypes.toString(), defaultOutputCols);
    }
    // Error stream always uses the default serde with a single column
    errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
    // Output record readers
    Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm.getChild(outputRecordReaderNum));
    Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm.getChild(inputRecordWriterNum));
    Class<? extends RecordReader> errRecordReader = getDefaultRecordReader();
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    // disable backtracking
    output.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    // Add URI entity for transform script. script assumed t be local unless downloadable
    if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
        String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
        getInputs().add(new ReadEntity(new Path(scriptCmd), ResourceDownloader.isFileUri(scriptCmd)));
    }
    return output;
}

Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) InsertTableDesc(org.apache.hadoop.hive.ql.plan.InsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 82 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genMapGroupByForSemijoin.

private Operator genMapGroupByForSemijoin(QB qb, ArrayList<ASTNode> fields, Operator<?> input, GroupByDesc.Mode mode) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(input).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < fields.size(); ++i) {
        // get the group by keys to ColumnInfo
        ASTNode colName = fields.get(i);
        String[] nm;
        String[] nm2;
        ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
        if (grpByExprNode instanceof ExprNodeColumnDesc) {
            // In most of the cases, this is a column reference
            ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) grpByExprNode;
            nm = groupByInputRowResolver.reverseLookup(columnExpr.getColumn());
            nm2 = groupByInputRowResolver.getAlternateMappings(columnExpr.getColumn());
        } else if (grpByExprNode instanceof ExprNodeConstantDesc) {
            // However, it can be a constant too. In that case, we need to track
            // the column that it originated from in the input operator so we can
            // propagate the aliases.
            ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) grpByExprNode;
            String inputCol = constantExpr.getFoldedFromCol();
            nm = groupByInputRowResolver.reverseLookup(inputCol);
            nm2 = groupByInputRowResolver.getAlternateMappings(inputCol);
        } else {
            // of the left semijoin
            return input;
        }
        groupByKeys.add(grpByExprNode);
        // generate output column names
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
        groupByOutputRowResolver.put(nm[0], nm[1], colInfo2);
        if (nm2 != null) {
            groupByOutputRowResolver.addMappingOnly(nm2[0], nm2[1], colInfo2);
        }
        groupByOutputRowResolver.putExpression(colName, colInfo2);
        // establish mapping from the output column to the input column
        colExprMap.put(field, grpByExprNode);
    }
    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), input), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}

Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 83 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genSelectPlan.

@SuppressWarnings("nls")
private Operator<?> genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator<?> input, Operator<?> inputForSelectStar, boolean outerLV) throws SemanticException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("tree: " + selExprList.toStringTree());
    }
    ArrayList<ExprNodeDesc> col_list = new ArrayList<ExprNodeDesc>();
    RowResolver out_rwsch = new RowResolver();
    ASTNode trfm = null;
    Integer pos = Integer.valueOf(0);
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    RowResolver starRR = null;
    if (inputForSelectStar != null && inputForSelectStar != input) {
        starRR = opParseCtx.get(inputForSelectStar).getRowResolver();
    }
    // SELECT * or SELECT TRANSFORM(*)
    boolean selectStar = false;
    int posn = 0;
    boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT);
    if (hintPresent) {
        posn++;
    }
    boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM);
    if (isInTransform) {
        queryProperties.setUsesScript(true);
        globalLimitCtx.setHasTransformOrUDTF(true);
        trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
    }
    // Detect queries of the form SELECT udtf(col) AS ...
    // by looking for a function as the first child, and then checking to see
    // if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
    // the lack of a special token.
    boolean isUDTF = false;
    String udtfTableAlias = null;
    ArrayList<String> udtfColAliases = new ArrayList<String>();
    ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
    GenericUDTF genericUDTF = null;
    int udtfExprType = udtfExpr.getType();
    if (udtfExprType == HiveParser.TOK_FUNCTION || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
        String funcName = TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(udtfExpr, true);
        FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
        if (fi != null) {
            genericUDTF = fi.getGenericUDTF();
        }
        isUDTF = (genericUDTF != null);
        if (isUDTF) {
            globalLimitCtx.setHasTransformOrUDTF(true);
        }
        if (isUDTF && !fi.isNative()) {
            unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr.getChild(0));
        }
        if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
            genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0), col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        }
    }
    if (isUDTF) {
        // Only support a single expression when it's a UDTF
        if (selExprList.getChildCount() > 1) {
            throw new SemanticException(generateErrorMessage((ASTNode) selExprList.getChild(1), ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
        }
        ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
        // column names also can be inferred from result of UDTF
        for (int i = 1; i < selExpr.getChildCount(); i++) {
            ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
            switch(selExprChild.getType()) {
                case HiveParser.Identifier:
                    udtfColAliases.add(unescapeIdentifier(selExprChild.getText().toLowerCase()));
                    unparseTranslator.addIdentifierTranslation(selExprChild);
                    break;
                case HiveParser.TOK_TABALIAS:
                    assert (selExprChild.getChildCount() == 1);
                    udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0).getText());
                    qb.addAlias(udtfTableAlias);
                    unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild.getChild(0));
                    break;
                default:
                    assert (false);
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("UDTF table alias is " + udtfTableAlias);
            LOG.debug("UDTF col aliases are " + udtfColAliases);
        }
    }
    // The list of expressions after SELECT or SELECT TRANSFORM.
    ASTNode exprList;
    if (isInTransform) {
        exprList = (ASTNode) trfm.getChild(0);
    } else if (isUDTF) {
        exprList = udtfExpr;
    } else {
        exprList = selExprList;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("genSelectPlan: input = " + inputRR + " starRr = " + starRR);
    }
    // For UDTF's, skip the function name to get the expressions
    int startPosn = isUDTF ? posn + 1 : posn;
    if (isInTransform) {
        startPosn = 0;
    }
    final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() || !qb.getParseInfo().getDestGroupingSets().isEmpty() || !qb.getParseInfo().getDestCubes().isEmpty());
    Set<String> colAliases = new HashSet<String>();
    ASTNode[] exprs = new ASTNode[exprList.getChildCount()];
    String[][] aliases = new String[exprList.getChildCount()][];
    boolean[] hasAsClauses = new boolean[exprList.getChildCount()];
    int offset = 0;
    // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
    for (int i = startPosn; i < exprList.getChildCount(); ++i) {
        // child can be EXPR AS ALIAS, or EXPR.
        ASTNode child = (ASTNode) exprList.getChild(i);
        boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
        boolean isWindowSpec = child.getChildCount() == 3 && child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC;
        // AST's are slightly different.
        if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) {
            throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(2), ErrorMsg.INVALID_AS.getMsg()));
        }
        // The real expression
        ASTNode expr;
        String tabAlias;
        String colAlias;
        if (isInTransform || isUDTF) {
            tabAlias = null;
            colAlias = autogenColAliasPrfxLbl + i;
            expr = child;
        } else {
            // Get rid of TOK_SELEXPR
            expr = (ASTNode) child.getChild(0);
            String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, autogenColAliasPrfxIncludeFuncName, i + offset);
            tabAlias = colRef[0];
            colAlias = colRef[1];
            if (hasAsClause) {
                unparseTranslator.addIdentifierTranslation((ASTNode) child.getChild(1));
            }
        }
        exprs[i] = expr;
        aliases[i] = new String[] { tabAlias, colAlias };
        hasAsClauses[i] = hasAsClause;
        colAliases.add(colAlias);
        // The real expression
        if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
            int initPos = pos;
            pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
            if (unparseTranslator.isEnabled()) {
                offset += pos - initPos - 1;
            }
            selectStar = true;
        } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) {
            // In case the expression is a regex COL.
            // This can only happen without AS clause
            // We don't allow this for ExprResolver - the Group By case
            pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) {
            // In case the expression is TABLE.COL (col can be regex).
            // This can only happen without AS clause
            // We don't allow this for ExprResolver - the Group By case
            pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        } else {
            // Case when this is an expression
            TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, true, isCBOExecuted());
            // We allow stateful functions in the SELECT list (but nowhere else)
            tcCtx.setAllowStatefulFunctions(true);
            tcCtx.setAllowDistinctFunctions(false);
            if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) {
                // If CBO did not optimize the query, we might need to replace grouping function
                // Special handling of grouping function
                expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr, !cubeRollupGrpSetPresent);
            }
            ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
            String recommended = recommendName(exp, colAlias);
            if (recommended != null && !colAliases.contains(recommended) && out_rwsch.get(null, recommended) == null) {
                colAlias = recommended;
            }
            col_list.add(exp);
            ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), exp.getWritableObjectInspector(), tabAlias, false);
            colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp).isSkewedCol() : false);
            out_rwsch.put(tabAlias, colAlias, colInfo);
            if (exp instanceof ExprNodeColumnDesc) {
                ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
                String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
                if (altMapping != null) {
                    out_rwsch.put(altMapping[0], altMapping[1], colInfo);
                }
            }
            pos = Integer.valueOf(pos.intValue() + 1);
        }
    }
    selectStar = selectStar && exprList.getChildCount() == posn + 1;
    out_rwsch = handleInsertStatementSpec(col_list, dest, out_rwsch, inputRR, qb, selExprList);
    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < col_list.size(); i++) {
        String outputCol = getColumnInternalName(i);
        colExprMap.put(outputCol, col_list.get(i));
        columnNames.add(outputCol);
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(col_list, columnNames, selectStar), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    output.setColumnExprMap(colExprMap);
    if (isInTransform) {
        output = genScriptPlan(trfm, qb, output);
    }
    if (isUDTF) {
        output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb, output, outerLV);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
    }
    return output;
}

Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) GenericUDTF(org.apache.hadoop.hive.ql.udf.generic.GenericUDTF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) FunctionInfo(org.apache.hadoop.hive.ql.exec.FunctionInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)

Example 84 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class GenSparkSkewJoinProcessor method processSkewJoin.

@SuppressWarnings("unchecked")
public static void processSkewJoin(JoinOperator joinOp, Task<? extends Serializable> currTask, ReduceWork reduceWork, ParseContext parseCtx) throws SemanticException {
    SparkWork currentWork = ((SparkTask) currTask).getWork();
    if (currentWork.getChildren(reduceWork).size() > 0) {
        LOG.warn("Skip runtime skew join as the ReduceWork has child work and hasn't been split.");
        return;
    }
    List<Task<? extends Serializable>> children = currTask.getChildTasks();
    Path baseTmpDir = parseCtx.getContext().getMRTmpPath();
    JoinDesc joinDescriptor = joinOp.getConf();
    Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
    int numAliases = joinValues.size();
    Map<Byte, Path> bigKeysDirMap = new HashMap<Byte, Path>();
    Map<Byte, Map<Byte, Path>> smallKeysDirMap = new HashMap<Byte, Map<Byte, Path>>();
    Map<Byte, Path> skewJoinJobResultsDir = new HashMap<Byte, Path>();
    Byte[] tags = joinDescriptor.getTagOrder();
    // for each joining table, set dir for big key and small keys properly
    for (int i = 0; i < numAliases; i++) {
        Byte alias = tags[i];
        bigKeysDirMap.put(alias, GenMRSkewJoinProcessor.getBigKeysDir(baseTmpDir, alias));
        Map<Byte, Path> smallKeysMap = new HashMap<Byte, Path>();
        smallKeysDirMap.put(alias, smallKeysMap);
        for (Byte src2 : tags) {
            if (!src2.equals(alias)) {
                smallKeysMap.put(src2, GenMRSkewJoinProcessor.getSmallKeysDir(baseTmpDir, alias, src2));
            }
        }
        skewJoinJobResultsDir.put(alias, GenMRSkewJoinProcessor.getBigKeysSkewJoinResultDir(baseTmpDir, alias));
    }
    joinDescriptor.setHandleSkewJoin(true);
    joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
    joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
    joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVESKEWJOINKEY));
    // create proper table/column desc for spilled tables
    TableDesc keyTblDesc = (TableDesc) reduceWork.getKeyDesc().clone();
    List<String> joinKeys = Utilities.getColumnNames(keyTblDesc.getProperties());
    List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc.getProperties());
    Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
    Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
    Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
    // used for create mapJoinDesc, should be in order
    List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();
    for (int i = 0; i < tags.length; i++) {
        newJoinValueTblDesc.add(null);
    }
    for (int i = 0; i < numAliases; i++) {
        Byte alias = tags[i];
        List<ExprNodeDesc> valueCols = joinValues.get(alias);
        String colNames = "";
        String colTypes = "";
        int columnSize = valueCols.size();
        List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
        List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
        ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
        boolean first = true;
        for (int k = 0; k < columnSize; k++) {
            TypeInfo type = valueCols.get(k).getTypeInfo();
            // any name, it does not matter.
            String newColName = i + "_VALUE_" + k;
            ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
            columnInfos.add(columnInfo);
            newValueExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
            if (!first) {
                colNames = colNames + ",";
                colTypes = colTypes + ",";
            }
            first = false;
            colNames = colNames + newColName;
            colTypes = colTypes + valueCols.get(k).getTypeString();
        }
        // we are putting join keys at last part of the spilled table
        for (int k = 0; k < joinKeys.size(); k++) {
            if (!first) {
                colNames = colNames + ",";
                colTypes = colTypes + ",";
            }
            first = false;
            colNames = colNames + joinKeys.get(k);
            colTypes = colTypes + joinKeyTypes.get(k);
            ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory.getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
            columnInfos.add(columnInfo);
            newKeyExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
        }
        newJoinValues.put(alias, newValueExpr);
        newJoinKeys.put(alias, newKeyExpr);
        tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
        rowSchemaList.put(alias, new RowSchema(columnInfos));
        // construct value table Desc
        String valueColNames = "";
        String valueColTypes = "";
        first = true;
        for (int k = 0; k < columnSize; k++) {
            // any name, it does not matter.
            String newColName = i + "_VALUE_" + k;
            if (!first) {
                valueColNames = valueColNames + ",";
                valueColTypes = valueColTypes + ",";
            }
            valueColNames = valueColNames + newColName;
            valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
            first = false;
        }
        newJoinValueTblDesc.set((byte) i, Utilities.getTableDesc(valueColNames, valueColTypes));
    }
    joinDescriptor.setSkewKeysValuesTables(tableDescList);
    joinDescriptor.setKeyTableDesc(keyTblDesc);
    // create N-1 map join tasks
    HashMap<Path, Task<? extends Serializable>> bigKeysDirToTaskMap = new HashMap<Path, Task<? extends Serializable>>();
    List<Serializable> listWorks = new ArrayList<Serializable>();
    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    for (int i = 0; i < numAliases - 1; i++) {
        Byte src = tags[i];
        HiveConf hiveConf = new HiveConf(parseCtx.getConf(), GenSparkSkewJoinProcessor.class);
        SparkWork sparkWork = new SparkWork(parseCtx.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
        Task<? extends Serializable> skewJoinMapJoinTask = TaskFactory.get(sparkWork);
        skewJoinMapJoinTask.setFetchSource(currTask.isFetchSource());
        // create N TableScans
        Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
        for (int k = 0; k < tags.length; k++) {
            Operator<? extends OperatorDesc> ts = GenMapRedUtils.createTemporaryTableScanOperator(joinOp.getCompilationOpContext(), rowSchemaList.get((byte) k));
            ((TableScanOperator) ts).setTableDescSkewJoin(tableDescList.get((byte) k));
            parentOps[k] = ts;
        }
        // create the MapJoinOperator
        String dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
        MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc, joinDescriptor.getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, joinDescriptor.getMemoryMonitorInfo(), joinDescriptor.getInMemoryDataSize());
        mapJoinDescriptor.setTagOrder(tags);
        mapJoinDescriptor.setHandleSkewJoin(false);
        mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());
        mapJoinDescriptor.setColumnExprMap(joinDescriptor.getColumnExprMap());
        // temporarily, mark it as child of all the TS
        MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(joinOp.getCompilationOpContext(), mapJoinDescriptor, null, parentOps);
        // clone the original join operator, and replace it with the MJ
        // this makes sure MJ has the same downstream operator plan as the original join
        List<Operator<?>> reducerList = new ArrayList<Operator<?>>();
        reducerList.add(reduceWork.getReducer());
        Operator<? extends OperatorDesc> reducer = SerializationUtilities.cloneOperatorTree(reducerList).get(0);
        Preconditions.checkArgument(reducer instanceof JoinOperator, "Reducer should be join operator, but actually is " + reducer.getName());
        JoinOperator cloneJoinOp = (JoinOperator) reducer;
        List<Operator<? extends OperatorDesc>> childOps = cloneJoinOp.getChildOperators();
        for (Operator<? extends OperatorDesc> childOp : childOps) {
            childOp.replaceParent(cloneJoinOp, mapJoinOp);
        }
        mapJoinOp.setChildOperators(childOps);
        // set memory usage for the MJ operator
        setMemUsage(mapJoinOp, skewJoinMapJoinTask, parseCtx);
        // create N MapWorks and add them to the SparkWork
        MapWork bigMapWork = null;
        Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
        for (int j = 0; j < tags.length; j++) {
            MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
            sparkWork.add(mapWork);
            // This code has been only added for testing
            boolean mapperCannotSpanPartns = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
            mapWork.setMapperCannotSpanPartns(mapperCannotSpanPartns);
            Operator<? extends OperatorDesc> tableScan = parentOps[j];
            String alias = tags[j].toString();
            ArrayList<String> aliases = new ArrayList<String>();
            aliases.add(alias);
            Path path;
            if (j == i) {
                path = bigKeysDirMap.get(tags[j]);
                bigKeysDirToTaskMap.put(path, skewJoinMapJoinTask);
                bigMapWork = mapWork;
            } else {
                path = smallTblDirs.get(tags[j]);
            }
            mapWork.addPathToAlias(path, aliases);
            mapWork.getAliasToWork().put(alias, tableScan);
            PartitionDesc partitionDesc = new PartitionDesc(tableDescList.get(tags[j]), null);
            mapWork.addPathToPartitionInfo(path, partitionDesc);
            mapWork.getAliasToPartnInfo().put(alias, partitionDesc);
            mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
        }
        // connect all small dir map work to the big dir map work
        Preconditions.checkArgument(bigMapWork != null, "Haven't identified big dir MapWork");
        // these 2 flags are intended only for the big-key map work
        bigMapWork.setNumMapTasks(HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINNUMMAPTASK));
        bigMapWork.setMinSplitSize(HiveConf.getLongVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT));
        // use HiveInputFormat so that we can control the number of map tasks
        bigMapWork.setInputformat(HiveInputFormat.class.getName());
        for (BaseWork work : sparkWork.getRoots()) {
            Preconditions.checkArgument(work instanceof MapWork, "All root work should be MapWork, but got " + work.getClass().getSimpleName());
            if (work != bigMapWork) {
                sparkWork.connect(work, bigMapWork, new SparkEdgeProperty(SparkEdgeProperty.SHUFFLE_NONE));
            }
        }
        // insert SparkHashTableSink and Dummy operators
        for (int j = 0; j < tags.length; j++) {
            if (j != i) {
                insertSHTS(tags[j], (TableScanOperator) parentOps[j], bigMapWork);
            }
        }
        listWorks.add(skewJoinMapJoinTask.getWork());
        listTasks.add(skewJoinMapJoinTask);
    }
    if (children != null) {
        for (Task<? extends Serializable> tsk : listTasks) {
            for (Task<? extends Serializable> oldChild : children) {
                tsk.addDependentTask(oldChild);
            }
        }
        currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
        for (Task<? extends Serializable> oldChild : children) {
            oldChild.getParentTasks().remove(currTask);
        }
        listTasks.addAll(children);
        for (Task<? extends Serializable> oldChild : children) {
            listWorks.add(oldChild.getWork());
        }
    }
    ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx context = new ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx(bigKeysDirToTaskMap, children);
    ConditionalWork cndWork = new ConditionalWork(listWorks);
    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
    cndTsk.setListTasks(listTasks);
    cndTsk.setResolver(new ConditionalResolverSkewJoin());
    cndTsk.setResolverCtx(context);
    currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
    currTask.addDependentTask(cndTsk);
}

Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ConditionalWork(org.apache.hadoop.hive.ql.plan.ConditionalWork) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) List(java.util.List) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) ConditionalResolverSkewJoin(org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) Map(java.util.Map) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) SparkHashTableSinkOperator(org.apache.hadoop.hive.ql.exec.SparkHashTableSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) Path(org.apache.hadoop.fs.Path) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SparkEdgeProperty(org.apache.hadoop.hive.ql.plan.SparkEdgeProperty) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc)

Example 85 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.

/**
 * @param operator : the select operator in the analyze statement
 * @param input : the operator right before FS in the insert overwrite statement
 * @throws HiveException
 */
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
    RowSchema selRS = operator.getSchema();
    ArrayList<ColumnInfo> signature = new ArrayList<>();
    OpParseContext inputCtx = sa.opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    // 1. deal with non-partition columns
    for (int i = 0; i < this.columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(i));
    }
    // if there is any partition column (in static partition or dynamic
    // partition or mixed case)
    int dynamicPartBegin = -1;
    for (int i = 0; i < partitionColumns.size(); i++) {
        ExprNodeDesc exprNodeDesc = null;
        String partColName = partitionColumns.get(i).getName();
        // 2. deal with static partition columns
        if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
            if (dynamicPartBegin > 0) {
                throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
            }
            exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
            TypeInfo srcType = exprNodeDesc.getTypeInfo();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            if (!srcType.equals(destType)) {
                // This may be possible when srcType is string but destType is integer
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        } else // 3. dynamic partition columns
        {
            dynamicPartBegin++;
            ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
            TypeInfo srcType = col.getType();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            exprNodeDesc = new ExprNodeColumnDesc(col);
            if (!srcType.equals(destType)) {
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        }
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(this.columns.size() + i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(this.columns.size() + i));
    }
    operator.setConf(new SelectDesc(colList, columnNames));
    operator.setColumnExprMap(columnExprMap);
    selRS.setSignature(signature);
    operator.setSchema(selRS);
}

Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)86 ArrayList (java.util.ArrayList)65 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)65 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)62 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)50 HashMap (java.util.HashMap)45 Operator (org.apache.hadoop.hive.ql.exec.Operator)42 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)39 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)38 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)37 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)34 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)31 LinkedHashMap (java.util.LinkedHashMap)30 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)28 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)28 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)27 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)25 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)22