Search in sources :

Example 71 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genSelectAllDesc.

private Operator genSelectAllDesc(Operator input) {
    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    for (ColumnInfo col : columns) {
        colList.add(new ExprNodeColumnDesc(col, true));
        columnNames.add(col.getInternalName());
        columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true));
    }
    RowResolver outputRR = inputRR.duplicate();
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR);
    output.setColumnExprMap(columnExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 72 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genLateralViewPlan.

private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) throws SemanticException {
    RowResolver lvForwardRR = new RowResolver();
    RowResolver source = opParseCtx.get(op).getRowResolver();
    Map<String, ExprNodeDesc> lvfColExprMap = new HashMap<String, ExprNodeDesc>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<String> colNames = new ArrayList<String>();
    for (ColumnInfo col : source.getColumnInfos()) {
        String[] tabCol = source.reverseLookup(col.getInternalName());
        lvForwardRR.put(tabCol[0], tabCol[1], col);
        ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(col);
        colList.add(colExpr);
        colNames.add(colExpr.getColumn());
        lvfColExprMap.put(col.getInternalName(), colExpr);
        selColExprMap.put(col.getInternalName(), colExpr.clone());
    }
    Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), op), lvForwardRR);
    lvForward.setColumnExprMap(lvfColExprMap);
    // The order in which the two paths are added is important. The
    // lateral view join operator depends on having the select operator
    // give it the row first.
    // Get the all path by making a select(*).
    RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
    // Operator allPath = op;
    SelectDesc sDesc = new SelectDesc(colList, colNames, false);
    sDesc.setSelStarNoCompute(true);
    Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(sDesc, new RowSchema(allPathRR.getColumnInfos()), lvForward), allPathRR);
    allPath.setColumnExprMap(selColExprMap);
    int allColumns = allPathRR.getColumnInfos().size();
    // Get the UDTF Path
    QB blankQb = new QB(null, null, false);
    Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree.getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
    // add udtf aliases to QB
    for (String udtfAlias : blankQb.getAliases()) {
        qb.addAlias(udtfAlias);
    }
    RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();
    // Merge the two into the lateral view join
    // The cols of the merged result will be the combination of both the
    // cols of the UDTF path and the cols of the all path. The internal
    // names have to be changed to avoid conflicts
    RowResolver lateralViewRR = new RowResolver();
    List<String> outputInternalColNames = new ArrayList<String>();
    // For PPD, we need a column to expression map so that during the walk,
    // the processor knows how to transform the internal col names.
    // Following steps are dependant on the fact that we called
    // LVmerge.. in the above order
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames);
    LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames);
    Operator lateralViewJoin = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), new RowSchema(lateralViewRR.getColumnInfos()), allPath, udtfPath), lateralViewRR);
    lateralViewJoin.setColumnExprMap(colExprMap);
    return lateralViewJoin;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) LateralViewJoinDesc(org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) LateralViewForwardDesc(org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 73 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genColListRegex.

@SuppressWarnings("nls")
// TODO: make aliases unique, otherwise needless rewriting takes place
Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, List<Pair<ColumnInfo, RowResolver>> colList, Set<ColumnInfo> excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, List<String> aliases, boolean ensureUniqueCols) throws SemanticException {
    if (colSrcRR == null) {
        colSrcRR = input;
    }
    // The table alias should exist
    if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) {
        throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(), sel));
    }
    // TODO: Have to put in the support for AS clause
    Pattern regex = null;
    try {
        regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
    } catch (PatternSyntaxException e) {
        throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), sel, e.getMessage()));
    }
    StringBuilder replacementText = new StringBuilder();
    int matched = 0;
    // ColumnInfos for table alias "".
    if (!aliases.contains("")) {
        aliases.add("");
    }
    /*
     * track the input ColumnInfos that are added to the output.
     * if a columnInfo has multiple mappings; then add the column only once,
     * but carry the mappings forward.
     */
    Map<ColumnInfo, ColumnInfo> inputColsProcessed = new HashMap<ColumnInfo, ColumnInfo>();
    if (colSrcRR.getNamedJoinInfo() != null) {
        // We got using() clause in previous join. Need to generate select list as
        // per standard. For * we will have joining columns first non-repeated
        // followed by other columns.
        Map<String, ColumnInfo> leftMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(0));
        Map<String, ColumnInfo> rightMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(1));
        Map<String, ColumnInfo> chosenMap = null;
        if (colSrcRR.getNamedJoinInfo().getHiveJoinType() != JoinType.RIGHTOUTER) {
            chosenMap = leftMap;
        } else {
            chosenMap = rightMap;
        }
        // first get the columns in named columns
        for (String columnName : colSrcRR.getNamedJoinInfo().getNamedColumns()) {
            for (Map.Entry<String, ColumnInfo> entry : chosenMap.entrySet()) {
                ColumnInfo colInfo = entry.getValue();
                if (!columnName.equals(colInfo.getAlias())) {
                    continue;
                }
                String name = colInfo.getInternalName();
                String[] tmp = colSrcRR.reverseLookup(name);
                // Skip the colinfos which are not for this particular alias
                if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
                    continue;
                }
                if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
                    continue;
                }
                ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
                if (oColInfo == null) {
                    colList.add(Pair.of(colInfo, colSrcRR));
                    oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
                    inputColsProcessed.put(colInfo, oColInfo);
                }
                if (ensureUniqueCols) {
                    if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
                        throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR);
                    }
                } else {
                    output.put(tmp[0], tmp[1], oColInfo);
                }
                pos++;
                matched++;
                if (unparseTranslator.isEnabled() || (tableMask.isEnabled() && analyzeRewrite == null)) {
                    if (replacementText.length() > 0) {
                        replacementText.append(", ");
                    }
                    replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
                    replacementText.append(".");
                    replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
                }
            }
        }
    }
    for (String alias : aliases) {
        Map<String, ColumnInfo> fMap = colSrcRR.getFieldMap(alias);
        if (fMap == null) {
            continue;
        }
        // from the input schema
        for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
            ColumnInfo colInfo = entry.getValue();
            if (colSrcRR.getNamedJoinInfo() != null && colSrcRR.getNamedJoinInfo().getNamedColumns().contains(colInfo.getAlias())) {
                // we already added this column in select list.
                continue;
            }
            if (excludeCols != null && excludeCols.contains(colInfo)) {
                // This was added during plan generation.
                continue;
            }
            // First, look up the column from the source against which * is to be
            // resolved.
            // We'd later translated this into the column from proper input, if
            // it's valid.
            // TODO: excludeCols may be possible to remove using the same
            // technique.
            String name = colInfo.getInternalName();
            String[] tmp = colSrcRR.reverseLookup(name);
            // Skip the colinfos which are not for this particular alias
            if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
                continue;
            }
            if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
                continue;
            }
            // Not matching the regex?
            if (!regex.matcher(tmp[1]).matches()) {
                continue;
            }
            // TODO: This is fraught with peril.
            if (input != colSrcRR) {
                colInfo = input.get(tabAlias, tmp[1]);
                if (colInfo == null) {
                    LOG.error("Cannot find colInfo for {}.{}, derived from [{}], in [{}]", tabAlias, tmp[1], colSrcRR, input);
                    throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, tmp[1]);
                }
                name = colInfo.getInternalName();
                tmp = input.reverseLookup(name);
                if (LOG.isDebugEnabled()) {
                    String oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
                    String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
                    LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]");
                }
            }
            ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
            if (oColInfo == null) {
                colList.add(Pair.of(colInfo, input));
                oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
                inputColsProcessed.put(colInfo, oColInfo);
            }
            assert nonNull(tmp);
            if (ensureUniqueCols) {
                if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
                    throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR);
                }
            } else {
                output.put(tmp[0], tmp[1], oColInfo);
            }
            pos++;
            matched++;
            if (unparseTranslator.isEnabled() || tableMask.isEnabled()) {
                if (replacementText.length() > 0) {
                    replacementText.append(", ");
                }
                replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
                replacementText.append(".");
                replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
            }
        }
    }
    if (matched == 0) {
        throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), sel));
    }
    unparseTranslator.addTranslation(sel, replacementText.toString());
    if (tableMask.isEnabled()) {
        tableMask.addTranslation(sel, replacementText.toString());
    }
    return pos;
}
Also used : Pattern(java.util.regex.Pattern) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) PatternSyntaxException(java.util.regex.PatternSyntaxException)

Example 74 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method setupStats.

private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException {
    // if it is not analyze command and not column stats, then do not gatherstats
    if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) {
        tsDesc.setGatherStats(false);
        return;
    }
    if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
        String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
        LOG.debug("Set stats collection dir : " + statsTmpLoc);
        tsDesc.setTmpStatsDir(statsTmpLoc);
    }
    tsDesc.setGatherStats(true);
    tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
    // append additional virtual columns for storing statistics
    Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator();
    List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
    while (vcs.hasNext()) {
        VirtualColumn vc = vcs.next();
        rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
        vcList.add(vc);
    }
    tsDesc.addVirtualCols(vcList);
    String tblName = tab.getTableName();
    // Theoretically the key prefix could be any unique string shared
    // between TableScanOperator (when publishing) and StatsTask (when aggregating).
    // Here we use
    // db_name.table_name + partitionSec
    // as the prefix for easy of read during explain and debugging.
    // Currently, partition spec can only be static partition.
    String k = FileUtils.escapePathName(tblName).toLowerCase() + Path.SEPARATOR;
    tsDesc.setStatsAggPrefix(FileUtils.escapePathName(tab.getDbName()).toLowerCase() + "." + k);
    // set up WriteEntity for replication and txn stats
    WriteEntity we = new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED);
    we.setTxnAnalyze(true);
    outputs.add(we);
    if (AcidUtils.isTransactionalTable(tab)) {
        if (acidAnalyzeTable != null) {
            throw new IllegalStateException("Multiple ACID tables in analyze: " + we + ", " + acidAnalyzeTable);
        }
        acidAnalyzeTable = we;
    }
    // add WriteEntity for each matching partition
    if (tab.isPartitioned()) {
        List<String> cols = new ArrayList<String>();
        if (qbp.getAnalyzeRewrite() != null) {
            List<FieldSchema> partitionCols = tab.getPartCols();
            for (FieldSchema fs : partitionCols) {
                cols.add(fs.getName());
            }
            tsDesc.setPartColumns(cols);
            return;
        }
        TableSpec tblSpec = qbp.getTableSpec(alias);
        Map<String, String> partSpec = tblSpec.getPartSpec();
        if (partSpec != null) {
            cols.addAll(partSpec.keySet());
            tsDesc.setPartColumns(cols);
        } else {
            throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
        }
        List<Partition> partitions = qbp.getTableSpec().partitions;
        if (partitions != null) {
            for (Partition partn : partitions) {
                WriteEntity pwe = new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK);
                pwe.setTxnAnalyze(true);
                outputs.add(pwe);
            }
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 75 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genMaterializedViewDataOrgPlan.

private Operator genMaterializedViewDataOrgPlan(List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos, RowResolver inputRR, Operator input) {
    // In this case, we will introduce a RS and immediately after a SEL that restores
    // the row schema to what follow-up operations are expecting
    Set<String> keys = sortColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
    Set<String> distributeKeys = distributeColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
    List<ExprNodeDesc> keyCols = new ArrayList<>();
    List<String> keyColNames = new ArrayList<>();
    StringBuilder order = new StringBuilder();
    StringBuilder nullOrder = new StringBuilder();
    List<ExprNodeDesc> valCols = new ArrayList<>();
    List<String> valColNames = new ArrayList<>();
    List<ExprNodeDesc> partCols = new ArrayList<>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
    Map<String, String> nameMapping = new HashMap<>();
    // map _col0 to KEY._col0, etc
    for (ColumnInfo ci : inputRR.getRowSchema().getSignature()) {
        ExprNodeColumnDesc e = new ExprNodeColumnDesc(ci);
        String columnName = ci.getInternalName();
        if (keys.contains(columnName)) {
            // key (sort column)
            keyColNames.add(columnName);
            keyCols.add(e);
            colExprMap.put(Utilities.ReduceField.KEY + "." + columnName, e);
            nameMapping.put(columnName, Utilities.ReduceField.KEY + "." + columnName);
            order.append("+");
            nullOrder.append("a");
        } else {
            // value
            valColNames.add(columnName);
            valCols.add(e);
            colExprMap.put(Utilities.ReduceField.VALUE + "." + columnName, e);
            nameMapping.put(columnName, Utilities.ReduceField.VALUE + "." + columnName);
        }
        if (distributeKeys.contains(columnName)) {
            // distribute column
            partCols.add(e.clone());
        }
    }
    // Create Key/Value TableDesc. When the operator plan is split into MR tasks,
    // the reduce operator will initialize Extract operator with information
    // from Key and Value TableDesc
    List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, "");
    TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, order.toString(), nullOrder.toString());
    List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, "");
    TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
    List<List<Integer>> distinctColumnIndices = new ArrayList<>();
    // Number of reducers is set to default (-1)
    ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, Operation.NOT_ACID);
    RowResolver rsRR = new RowResolver();
    List<ColumnInfo> rsSignature = new ArrayList<>();
    for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
        rsSignature.add(colInfo);
        rsRR.put(nm[0], nm[1], colInfo);
        if (nm2 != null) {
            rsRR.addMappingOnly(nm2[0], nm2[1], colInfo);
        }
    }
    Operator<?> result = putOpInsertMap(OperatorFactory.getAndMakeChild(rsConf, new RowSchema(rsSignature), input), rsRR);
    result.setColumnExprMap(colExprMap);
    // Create SEL operator
    RowResolver selRR = new RowResolver();
    List<ColumnInfo> selSignature = new ArrayList<>();
    List<ExprNodeDesc> columnExprs = new ArrayList<>();
    List<String> colNames = new ArrayList<>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<>();
    for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        selSignature.add(colInfo);
        selRR.put(nm[0], nm[1], colInfo);
        if (nm2 != null) {
            selRR.addMappingOnly(nm2[0], nm2[1], colInfo);
        }
        String colName = colInfo.getInternalName();
        ExprNodeDesc exprNodeDesc;
        if (keys.contains(colName)) {
            exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false);
            columnExprs.add(exprNodeDesc);
        } else {
            exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false);
            columnExprs.add(exprNodeDesc);
        }
        colNames.add(colName);
        selColExprMap.put(colName, exprNodeDesc);
    }
    SelectDesc selConf = new SelectDesc(columnExprs, colNames);
    result = putOpInsertMap(OperatorFactory.getAndMakeChild(selConf, new RowSchema(selSignature), result), selRR);
    result.setColumnExprMap(selColExprMap);
    return result;
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PreInsertTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)225 ArrayList (java.util.ArrayList)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)138 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)100 HashMap (java.util.HashMap)93 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)86 LinkedHashMap (java.util.LinkedHashMap)71 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)59 Operator (org.apache.hadoop.hive.ql.exec.Operator)48 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)47 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)47 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)45 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)45 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)45 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)45 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)45 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)45 Map (java.util.Map)41 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)39 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)38