Examples with ExprNodeDesc - org.apache.hadoop.hive.ql.plan.ExprNodeDesc

Example 61 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ProjectionPusher method pushProjectionsAndFilters.

private void pushProjectionsAndFilters(final JobConf jobConf, final String splitPath, final String splitPathWithNoSchema) {
    if (mapWork == null) {
        return;
    } else if (mapWork.getPathToAliases() == null) {
        return;
    }
    final Set<String> aliases = new HashSet<String>();
    final Iterator<Entry<Path, ArrayList<String>>> iterator = mapWork.getPathToAliases().entrySet().iterator();
    while (iterator.hasNext()) {
        final Entry<Path, ArrayList<String>> entry = iterator.next();
        final String key = entry.getKey().toUri().getPath();
        if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) {
            aliases.addAll(entry.getValue());
        }
    }
    // Collect the needed columns from all the aliases and create ORed filter
    // expression for the table.
    boolean allColumnsNeeded = false;
    boolean noFilters = false;
    Set<Integer> neededColumnIDs = new HashSet<Integer>();
    // To support nested column pruning, we need to track the path from the top to the nested
    // fields
    Set<String> neededNestedColumnPaths = new HashSet<String>();
    List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
    RowSchema rowSchema = null;
    for (String alias : aliases) {
        final Operator<? extends Serializable> op = mapWork.getAliasToWork().get(alias);
        if (op != null && op instanceof TableScanOperator) {
            final TableScanOperator ts = (TableScanOperator) op;
            if (ts.getNeededColumnIDs() == null) {
                allColumnsNeeded = true;
            } else {
                neededColumnIDs.addAll(ts.getNeededColumnIDs());
                neededNestedColumnPaths.addAll(ts.getNeededNestedColumnPaths());
            }
            rowSchema = ts.getSchema();
            ExprNodeGenericFuncDesc filterExpr = ts.getConf() == null ? null : ts.getConf().getFilterExpr();
            // No filter if any TS has no filter expression
            noFilters = filterExpr == null;
            filterExprs.add(filterExpr);
        }
    }
    ExprNodeGenericFuncDesc tableFilterExpr = null;
    if (!noFilters) {
        try {
            for (ExprNodeGenericFuncDesc filterExpr : filterExprs) {
                if (tableFilterExpr == null) {
                    tableFilterExpr = filterExpr;
                } else {
                    tableFilterExpr = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(), Arrays.<ExprNodeDesc>asList(tableFilterExpr, filterExpr));
                }
            }
        } catch (UDFArgumentException ex) {
            LOG.debug("Turn off filtering due to " + ex);
            tableFilterExpr = null;
        }
    }
    // push down projections
    if (!allColumnsNeeded) {
        if (!neededColumnIDs.isEmpty()) {
            ColumnProjectionUtils.appendReadColumns(jobConf, new ArrayList<Integer>(neededColumnIDs));
            ColumnProjectionUtils.appendNestedColumnPaths(jobConf, new ArrayList<String>(neededNestedColumnPaths));
        }
    } else {
        ColumnProjectionUtils.setReadAllColumns(jobConf);
    }
    pushFilters(jobConf, rowSchema, tableFilterExpr);
}

Also used : Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) Entry(java.util.Map.Entry) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) HashSet(java.util.HashSet)

Example 62 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class GenMapRedUtils method setMapWork.

/**
   * initialize MapWork
   *
   * @param alias_id
   *          current alias
   * @param topOp
   *          the top operator of the stack
   * @param plan
   *          map work to initialize
   * @param local
   *          whether you need to add to map-reduce or local work
   * @param pList
   *          pruned partition list. If it is null it will be computed on-the-fly.
   * @param inputs
   *          read entities for the map work
   * @param conf
   *          current instance of hive conf
   */
public static void setMapWork(MapWork plan, ParseContext parseCtx, Set<ReadEntity> inputs, PrunedPartitionList partsList, TableScanOperator tsOp, String alias_id, HiveConf conf, boolean local) throws SemanticException {
    ArrayList<Path> partDir = new ArrayList<Path>();
    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
    boolean isAcidTable = false;
    Path tblDir = null;
    plan.setNameToSplitSample(parseCtx.getNameToSplitSample());
    if (partsList == null) {
        try {
            partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
            isAcidTable = tsOp.getConf().isAcidTable();
        } catch (SemanticException e) {
            throw e;
        }
    }
    // Generate the map work for this alias_id
    // pass both confirmed and unknown partitions through the map-reduce
    // framework
    Set<Partition> parts = partsList.getPartitions();
    PartitionDesc aliasPartnDesc = null;
    try {
        if (!parts.isEmpty()) {
            aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
        }
    } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    // The table does not have any partitions
    if (aliasPartnDesc == null) {
        aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(tsOp.getConf().getTableMetadata()), null);
    }
    Map<String, String> props = tsOp.getConf().getOpProps();
    if (props != null) {
        Properties target = aliasPartnDesc.getProperties();
        target.putAll(props);
    }
    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);
    long sizeNeeded = Integer.MAX_VALUE;
    int fileLimit = -1;
    if (parseCtx.getGlobalLimitCtx().isEnable()) {
        if (isAcidTable) {
            LOG.info("Skip Global Limit optimization for ACID table");
            parseCtx.getGlobalLimitCtx().disableOpt();
        } else {
            long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
            sizeNeeded = (parseCtx.getGlobalLimitCtx().getGlobalOffset() + parseCtx.getGlobalLimitCtx().getGlobalLimit()) * sizePerRow;
            // for the optimization that reduce number of input file, we limit number
            // of files allowed. If more than specific number of files have to be
            // selected, we skip this optimization. Since having too many files as
            // inputs can cause unpredictable latency. It's not necessarily to be
            // cheaper.
            fileLimit = HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
            if (sizePerRow <= 0 || fileLimit <= 0) {
                LOG.info("Skip optimization to reduce input size of 'limit'");
                parseCtx.getGlobalLimitCtx().disableOpt();
            } else if (parts.isEmpty()) {
                LOG.info("Empty input: skip limit optimization");
            } else {
                LOG.info("Try to reduce input size for 'limit' " + "sizeNeeded: " + sizeNeeded + "  file limit : " + fileLimit);
            }
        }
    }
    boolean isFirstPart = true;
    boolean emptyInput = true;
    boolean singlePartition = (parts.size() == 1);
    // Track the dependencies for the view. Consider a query like: select * from V;
    // where V is a view of the form: select * from T
    // The dependencies should include V at depth 0, and T at depth 1 (inferred).
    Map<String, ReadEntity> viewToInput = parseCtx.getViewAliasToInput();
    ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(alias_id, viewToInput);
    // The table should also be considered a part of inputs, even if the table is a
    // partitioned table and whether any partition is selected or not
    //This read entity is a direct read entity and not an indirect read (that is when
    // this is being read because it is a dependency of a view).
    boolean isDirectRead = (parentViewInfo == null);
    TableDesc tblDesc = null;
    boolean initTableDesc = false;
    PlanUtils.addPartitionInputs(parts, inputs, parentViewInfo, isDirectRead);
    for (Partition part : parts) {
        // Later the properties have to come from the partition as opposed
        // to from the table in order to support versioning.
        Path[] paths = null;
        SampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(tsOp);
        // Lookup list bucketing pruner
        Map<String, ExprNodeDesc> partToPruner = parseCtx.getOpToPartToSkewedPruner().get(tsOp);
        ExprNodeDesc listBucketingPruner = (partToPruner != null) ? partToPruner.get(part.getName()) : null;
        if (sampleDescr != null) {
            assert (listBucketingPruner == null) : "Sampling and list bucketing can't coexit.";
            paths = SamplePruner.prune(part, sampleDescr);
            parseCtx.getGlobalLimitCtx().disableOpt();
        } else if (listBucketingPruner != null) {
            assert (sampleDescr == null) : "Sampling and list bucketing can't coexist.";
            /* Use list bucketing prunner's path. */
            paths = ListBucketingPruner.prune(parseCtx, part, listBucketingPruner);
        } else {
            // contain enough size, we change to normal mode.
            if (parseCtx.getGlobalLimitCtx().isEnable()) {
                if (isFirstPart) {
                    long sizeLeft = sizeNeeded;
                    ArrayList<Path> retPathList = new ArrayList<Path>();
                    SamplePruner.LimitPruneRetStatus status = SamplePruner.limitPrune(part, sizeLeft, fileLimit, retPathList);
                    if (status.equals(SamplePruner.LimitPruneRetStatus.NoFile)) {
                        continue;
                    } else if (status.equals(SamplePruner.LimitPruneRetStatus.NotQualify)) {
                        LOG.info("Use full input -- first " + fileLimit + " files are more than " + sizeNeeded + " bytes");
                        parseCtx.getGlobalLimitCtx().disableOpt();
                    } else {
                        emptyInput = false;
                        paths = new Path[retPathList.size()];
                        int index = 0;
                        for (Path path : retPathList) {
                            paths[index++] = path;
                        }
                        if (status.equals(SamplePruner.LimitPruneRetStatus.NeedAllFiles) && singlePartition) {
                            // if all files are needed to meet the size limit, we disable
                            // optimization. It usually happens for empty table/partition or
                            // table/partition with only one file. By disabling this
                            // optimization, we can avoid retrying the query if there is
                            // not sufficient rows.
                            parseCtx.getGlobalLimitCtx().disableOpt();
                        }
                    }
                    isFirstPart = false;
                } else {
                    paths = new Path[0];
                }
            }
            if (!parseCtx.getGlobalLimitCtx().isEnable()) {
                paths = part.getPath();
            }
        }
        // is it a partitioned table ?
        if (!part.getTable().isPartitioned()) {
            assert (tblDir == null);
            tblDir = paths[0];
            if (!initTableDesc) {
                tblDesc = Utilities.getTableDesc(part.getTable());
                initTableDesc = true;
            }
        } else if (tblDesc == null) {
            if (!initTableDesc) {
                tblDesc = Utilities.getTableDesc(part.getTable());
                initTableDesc = true;
            }
        }
        if (props != null) {
            Properties target = tblDesc.getProperties();
            target.putAll(props);
        }
        for (Path p : paths) {
            if (p == null) {
                continue;
            }
            String path = p.toString();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding " + path + " of table" + alias_id);
            }
            partDir.add(p);
            try {
                if (part.getTable().isPartitioned()) {
                    partDesc.add(Utilities.getPartitionDesc(part));
                } else {
                    partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part, false));
                }
            } catch (HiveException e) {
                LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
                throw new SemanticException(e.getMessage(), e);
            }
        }
    }
    if (emptyInput) {
        parseCtx.getGlobalLimitCtx().disableOpt();
    }
    Utilities.addSchemaEvolutionToTableScanOperator(partsList.getSourceTable(), tsOp);
    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();
    if (!local) {
        while (iterPath.hasNext()) {
            assert iterPartnDesc.hasNext();
            Path path = iterPath.next();
            PartitionDesc prtDesc = iterPartnDesc.next();
            // Add the path to alias mapping
            plan.addPathToAlias(path, alias_id);
            plan.addPathToPartitionInfo(path, prtDesc);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Information added for path " + path);
            }
        }
        assert plan.getAliasToWork().get(alias_id) == null;
        plan.getAliasToWork().put(alias_id, tsOp);
    } else {
        // populate local work if needed
        MapredLocalWork localPlan = plan.getMapRedLocalWork();
        if (localPlan == null) {
            localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
        }
        assert localPlan.getAliasToWork().get(alias_id) == null;
        assert localPlan.getAliasToFetchWork().get(alias_id) == null;
        localPlan.getAliasToWork().put(alias_id, tsOp);
        if (tblDir == null) {
            tblDesc = Utilities.getTableDesc(partsList.getSourceTable());
            localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(partDir, partDesc, tblDesc));
        } else {
            localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir, tblDesc));
        }
        plan.setMapRedLocalWork(localPlan);
    }
}

Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) SampleDesc(org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 63 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class MapJoinProcessor method genSelectPlan.

protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
    List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
    input.setChildOperators(null);
    // create a dummy select - This select is needed by the walker to split the
    // mapJoin later on
    RowSchema inputRS = input.getSchema();
    ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputs = new ArrayList<String>();
    List<String> outputCols = input.getConf().getOutputColumnNames();
    ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
        ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
        exprs.add(colDesc);
        outputs.add(internalName);
        ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
        newCol.setAlias(valueInfo.getAlias());
        outputRS.add(newCol);
        colExprMap.put(internalName, colDesc);
    }
    SelectDesc select = new SelectDesc(exprs, outputs, false);
    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
    sel.setColumnExprMap(colExprMap);
    // Insert the select operator in between.
    sel.setChildOperators(childOps);
    for (Operator<? extends OperatorDesc> ch : childOps) {
        ch.replaceParent(input, sel);
    }
}

Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 64 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class MapJoinProcessor method convertSMBJoinToMapJoin.

/**
   * convert a sortmerge join to a a map-side join.
   *
   * @param opParseCtxMap
   * @param smbJoinOp
   *          join operator
   * @param joinTree
   *          qb join tree
   * @param bigTablePos
   *          position of the source to be read as part of map-reduce framework. All other sources
   *          are cached in memory
   * @param noCheckOuterJoin
   */
public static MapJoinOperator convertSMBJoinToMapJoin(HiveConf hconf, SMBMapJoinOperator smbJoinOp, int bigTablePos, boolean noCheckOuterJoin) throws SemanticException {
    // Create a new map join operator
    SMBJoinDesc smbJoinDesc = smbJoinOp.getConf();
    List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0));
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(), keyTableDesc, smbJoinDesc.getExprs(), smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(), smbJoinDesc.getOutputColumnNames(), bigTablePos, smbJoinDesc.getConds(), smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix());
    mapJoinDesc.setStatistics(smbJoinDesc.getStatistics());
    RowSchema joinRS = smbJoinOp.getSchema();
    // The mapjoin has the same schema as the join operator
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(smbJoinOp.getCompilationOpContext(), mapJoinDesc, joinRS, new ArrayList<Operator<? extends OperatorDesc>>());
    // change the children of the original join operator to point to the map
    // join operator
    List<Operator<? extends OperatorDesc>> childOps = smbJoinOp.getChildOperators();
    for (Operator<? extends OperatorDesc> childOp : childOps) {
        childOp.replaceParent(smbJoinOp, mapJoinOp);
    }
    mapJoinOp.setChildOperators(childOps);
    smbJoinOp.setChildOperators(null);
    // change the parent of the original SMBjoin operator to point to the map
    // join operator
    List<Operator<? extends OperatorDesc>> parentOps = smbJoinOp.getParentOperators();
    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
        parentOp.replaceChild(smbJoinOp, mapJoinOp);
    }
    mapJoinOp.setParentOperators(parentOps);
    smbJoinOp.setParentOperators(null);
    return mapJoinOp;
}

Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 65 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class MapJoinProcessor method getKeys.

public static ObjectPair<List<ReduceSinkOperator>, Map<Byte, List<ExprNodeDesc>>> getKeys(boolean leftInputJoin, String[] baseSrc, JoinOperator op) {
    // Walk over all the sources (which are guaranteed to be reduce sink
    // operators).
    // The join outputs a concatenation of all the inputs.
    List<ReduceSinkOperator> oldReduceSinkParentOps = new ArrayList<ReduceSinkOperator>(op.getNumParent());
    if (leftInputJoin) {
        // assert mapJoinPos == 0;
        Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(0);
        assert parentOp.getParentOperators().size() == 1;
        oldReduceSinkParentOps.add((ReduceSinkOperator) parentOp);
    }
    byte pos = 0;
    for (String src : baseSrc) {
        if (src != null) {
            Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(pos);
            assert parentOp.getParentOperators().size() == 1;
            oldReduceSinkParentOps.add((ReduceSinkOperator) parentOp);
        }
        pos++;
    }
    // get the join keys from old parent ReduceSink operators
    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    for (pos = 0; pos < op.getParentOperators().size(); pos++) {
        ReduceSinkOperator inputRS = oldReduceSinkParentOps.get(pos);
        List<ExprNodeDesc> keyCols = inputRS.getConf().getKeyCols();
        keyExprMap.put(pos, keyCols);
    }
    return new ObjectPair<List<ReduceSinkOperator>, Map<Byte, List<ExprNodeDesc>>>(oldReduceSinkParentOps, keyExprMap);
}

Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ObjectPair(org.apache.hadoop.hive.common.ObjectPair)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32