Search in sources :

Example 11 with SMBMapJoinOperator

use of org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator in project hive by apache.

the class GenSparkUtils method annotateMapWork.

/**
 * Fill MapWork with 'local' work and bucket information for SMB Join.
 * @param context context, containing references to MapWorks and their SMB information.
 * @throws SemanticException
 */
public void annotateMapWork(GenSparkProcContext context) throws SemanticException {
    for (SMBMapJoinOperator smbMapJoinOp : context.smbMapJoinCtxMap.keySet()) {
        // initialize mapwork with smbMapJoin information.
        SparkSMBMapJoinInfo smbMapJoinInfo = context.smbMapJoinCtxMap.get(smbMapJoinOp);
        MapWork work = smbMapJoinInfo.mapWork;
        SparkSortMergeJoinFactory.annotateMapWork(context, work, smbMapJoinOp, (TableScanOperator) smbMapJoinInfo.bigTableRootOp, false);
        for (Operator<?> smallTableRootOp : smbMapJoinInfo.smallTableRootOps) {
            SparkSortMergeJoinFactory.annotateMapWork(context, work, smbMapJoinOp, (TableScanOperator) smallTableRootOp, true);
        }
    }
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)

Example 12 with SMBMapJoinOperator

use of org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator in project hive by apache.

the class SemanticAnalyzer method analyzeInternal.

void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
    // 1. Generate Resolved Parse tree from syntax tree
    LOG.info("Starting Semantic Analysis");
    //change the location of position alias process here
    processPositionAlias(ast);
    if (!genResolvedParseTree(ast, plannerCtx)) {
        return;
    }
    // 2. Gen OP Tree from resolved Parse Tree
    Operator sinkOp = genOPTree(ast, plannerCtx);
    if (!unparseTranslator.isEnabled() && tableMask.isEnabled()) {
        // Here we rewrite the * and also the masking table
        ASTNode tree = rewriteASTWithMaskAndFilter(ast);
        if (tree != ast) {
            ctx.setSkipTableMasking(true);
            init(true);
            //change the location of position alias process here
            processPositionAlias(tree);
            genResolvedParseTree(tree, plannerCtx);
            if (this instanceof CalcitePlanner) {
                ((CalcitePlanner) this).resetCalciteConfiguration();
            }
            sinkOp = genOPTree(tree, plannerCtx);
        }
    }
    // 3. Deduce Resultset Schema
    if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
        resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
        // succeeds.
        if (resultSchema == null) {
            resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
        }
    }
    // 4. Generate Parse Context for Optimizer & Physical compiler
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
    // 5. Take care of view creation
    if (createVwDesc != null) {
        if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
            return;
        }
        if (!ctx.isCboSucceeded()) {
            saveViewDefinition();
        }
        // validate the create view statement at this point, the createVwDesc gets
        // all the information for semanticcheck
        validateCreateView();
        if (!createVwDesc.isMaterialized()) {
            // Since we're only creating a view (not executing it), we don't need to
            // optimize or translate the plan (and in fact, those procedures can
            // interfere with the view creation). So skip the rest of this method.
            ctx.setResDir(null);
            ctx.setResFile(null);
            try {
                PlanUtils.addInputsForView(pCtx);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            // Generate lineage info for create view statements
            // if LineageLogger hook is configured.
            // Add the transformation that computes the lineage information.
            Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
            if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
                ArrayList<Transform> transformations = new ArrayList<Transform>();
                transformations.add(new HiveOpConverterPostProc());
                transformations.add(new Generator());
                for (Transform t : transformations) {
                    pCtx = t.transform(pCtx);
                }
                // we just use view name as location.
                SessionState.get().getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
            }
            return;
        }
    }
    // 6. Generate table access stats if required
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
        TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
        setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
    }
    // 7. Perform Logical optimization
    if (LOG.isDebugEnabled()) {
        LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    if (pCtx.getColumnAccessInfo() != null) {
        // set ColumnAccessInfo for view column authorization
        setColumnAccessInfo(pCtx.getColumnAccessInfo());
    }
    FetchTask origFetchTask = pCtx.getFetchTask();
    if (LOG.isDebugEnabled()) {
        LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    // 8. Generate column access stats if required - wait until column pruning
    // takes place during optimization
    boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
    if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
        // view column access info is carried by this.getColumnAccessInfo().
        setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
    }
    // TEZ..)
    if (!ctx.getExplainLogical()) {
        TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
        compiler.init(queryState, console, db);
        compiler.compile(pCtx, rootTasks, inputs, outputs);
        fetchTask = pCtx.getFetchTask();
    }
    LOG.info("Completed plan generation");
    // 10. put accessed columns to readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }
    // 11. if desired check we're not going over partition scan limits
    if (!ctx.isExplainSkipExecution()) {
        enforceScanLimits(pCtx, origFetchTask);
    }
    return;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) ArrayList(java.util.ArrayList) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Example 13 with SMBMapJoinOperator

use of org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator in project hive by apache.

the class SparkCompiler method generateTaskTreeHelper.

private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
    opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new SemanticNodeProcessor() {

        @Override
        public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            UnionOperator union = (UnionOperator) n;
            // simply need to remember that we've seen a union.
            context.currentUnionOperators.add(union);
            return null;
        }
    });
    /**
     *  SMB join case:   (Big)   (Small)  (Small)
     *                     TS       TS       TS
     *                      \       |       /
     *                       \      DS     DS
     *                         \   |    /
     *                         SMBJoinOP
     *
     * Some of the other processors are expecting only one traversal beyond SMBJoinOp.
     * We need to traverse from the big-table path only, and stop traversing on the
     * small-table path once we reach SMBJoinOp.
     * Also add some SMB join information to the context, so we can properly annotate
     * the MapWork later on.
     */
    opRules.put(new TypeRule(SMBMapJoinOperator.class), new SemanticNodeProcessor() {

        @Override
        public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
            SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
            if (smbMapJoinCtx == null) {
                smbMapJoinCtx = new SparkSMBMapJoinInfo();
                context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
            }
            for (Node stackNode : stack) {
                if (stackNode instanceof DummyStoreOperator) {
                    // If coming from small-table side, do some book-keeping, and skip traversal.
                    smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
                    return true;
                }
            }
            // If coming from big-table side, do some book-keeping, and continue traversal
            smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
            return false;
        }
    });
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) SparkReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.spark.SparkReduceSinkMapJoinProc) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Example 14 with SMBMapJoinOperator

use of org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator in project hive by apache.

the class GenSparkWork method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
    GenSparkProcContext context = (GenSparkProcContext) procContext;
    Preconditions.checkArgument(context != null, "AssertionError: expected context to be not null");
    Preconditions.checkArgument(context.currentTask != null, "AssertionError: expected context.currentTask to be not null");
    Preconditions.checkArgument(context.currentRootOperator != null, "AssertionError: expected context.currentRootOperator to be not null");
    // Operator is a file sink or reduce sink. Something that forces a new vertex.
    @SuppressWarnings("unchecked") Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>) nd;
    // root is the start of the operator pipeline we're currently
    // packing into a vertex, typically a table scan, union or join
    Operator<?> root = context.currentRootOperator;
    LOG.debug("Root operator: " + root);
    LOG.debug("Leaf operator: " + operator);
    SparkWork sparkWork = context.currentTask.getWork();
    SMBMapJoinOperator smbOp = GenSparkUtils.getChildOperator(root, SMBMapJoinOperator.class);
    // Right now the work graph is pretty simple. If there is no
    // Preceding work we have a root and will generate a map
    // vertex. If there is a preceding work we will generate
    // a reduce vertex
    BaseWork work;
    if (context.rootToWorkMap.containsKey(root)) {
        // having seen the root operator before means there was a branch in the
        // operator graph. There's typically two reasons for that: a) mux/demux
        // b) multi insert. Mux/Demux will hit the same leaf again, multi insert
        // will result into a vertex with multiple FS or RS operators.
        // At this point we don't have to do anything special in this case. Just
        // run through the regular paces w/o creating a new task.
        work = context.rootToWorkMap.get(root);
    } else {
        // create a new vertex
        if (context.preceedingWork == null) {
            if (smbOp == null) {
                work = utils.createMapWork(context, root, sparkWork, null);
            } else {
                // save work to be initialized later with SMB information.
                work = utils.createMapWork(context, root, sparkWork, null, true);
                context.smbMapJoinCtxMap.get(smbOp).mapWork = (MapWork) work;
            }
        } else {
            work = utils.createReduceWork(context, root, sparkWork);
        }
        context.rootToWorkMap.put(root, work);
    }
    if (!context.childToWorkMap.containsKey(operator)) {
        List<BaseWork> workItems = new LinkedList<BaseWork>();
        workItems.add(work);
        context.childToWorkMap.put(operator, workItems);
    } else {
        context.childToWorkMap.get(operator).add(work);
    }
    // remember which mapjoin operator links with which work
    if (!context.currentMapJoinOperators.isEmpty()) {
        for (MapJoinOperator mj : context.currentMapJoinOperators) {
            LOG.debug("Processing map join: " + mj);
            // remember the mapping in case we scan another branch of the mapjoin later
            if (!context.mapJoinWorkMap.containsKey(mj)) {
                List<BaseWork> workItems = new LinkedList<BaseWork>();
                workItems.add(work);
                context.mapJoinWorkMap.put(mj, workItems);
            } else {
                context.mapJoinWorkMap.get(mj).add(work);
            }
            /*
         * this happens in case of map join operations.
         * The tree looks like this:
         *
         *        RS <--- we are here perhaps
         *        |
         *     MapJoin
         *     /     \
         *   RS       TS
         *  /
         * TS
         *
         * If we are at the RS pointed above, and we may have already visited the
         * RS following the TS, we have already generated work for the TS-RS.
         * We need to hook the current work to this generated work.
         */
            if (context.linkOpWithWorkMap.containsKey(mj)) {
                Map<BaseWork, SparkEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
                if (linkWorkMap != null) {
                    if (context.linkChildOpWithDummyOp.containsKey(mj)) {
                        for (Operator<?> dummy : context.linkChildOpWithDummyOp.get(mj)) {
                            work.addDummyOp((HashTableDummyOperator) dummy);
                        }
                    }
                    for (Entry<BaseWork, SparkEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
                        BaseWork parentWork = parentWorkMap.getKey();
                        LOG.debug("connecting " + parentWork.getName() + " with " + work.getName());
                        SparkEdgeProperty edgeProp = parentWorkMap.getValue();
                        sparkWork.connect(parentWork, work, edgeProp);
                        // of the downstream work
                        for (ReduceSinkOperator r : context.linkWorkWithReduceSinkMap.get(parentWork)) {
                            if (r.getConf().getOutputName() != null) {
                                LOG.debug("Cloning reduce sink for multi-child broadcast edge");
                                // we've already set this one up. Need to clone for the next work.
                                r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(r.getCompilationOpContext(), (ReduceSinkDesc) r.getConf().clone(), r.getParentOperators());
                            }
                            r.getConf().setOutputName(work.getName());
                        }
                    }
                }
            }
        }
        // clear out the set. we don't need it anymore.
        context.currentMapJoinOperators.clear();
    }
    // with this root operator.
    if (root.getNumParent() > 0) {
        Preconditions.checkArgument(work instanceof ReduceWork, "AssertionError: expected work to be a ReduceWork, but was " + work.getClass().getName());
        ReduceWork reduceWork = (ReduceWork) work;
        for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
            Preconditions.checkArgument(parent instanceof ReduceSinkOperator, "AssertionError: expected operator to be a ReduceSinkOperator, but was " + parent.getClass().getName());
            ReduceSinkOperator rsOp = (ReduceSinkOperator) parent;
            SparkEdgeProperty edgeProp = GenSparkUtils.getEdgeProperty(context.conf, rsOp, reduceWork);
            rsOp.getConf().setOutputName(reduceWork.getName());
            GenMapRedUtils.setKeyAndValueDesc(reduceWork, rsOp);
            context.leafOpToFollowingWorkInfo.put(rsOp, Pair.of(edgeProp, reduceWork));
            LOG.debug("Removing " + parent + " as parent from " + root);
            root.removeParent(parent);
        }
    }
    // the union operators from the operator tree later.
    if (!context.currentUnionOperators.isEmpty()) {
        context.currentUnionOperators.clear();
        context.workWithUnionOperators.add(work);
    }
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOpToFollowingWorkInfo.containsKey(operator)) {
        Pair<SparkEdgeProperty, ReduceWork> childWorkInfo = context.leafOpToFollowingWorkInfo.get(operator);
        SparkEdgeProperty edgeProp = childWorkInfo.getLeft();
        ReduceWork childWork = childWorkInfo.getRight();
        LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work:" + childWork);
        // we don't want to connect them with the work associated with TS more than once.
        if (sparkWork.getEdgeProperty(work, childWork) == null) {
            sparkWork.connect(work, childWork, edgeProp);
        } else {
            LOG.debug("work " + work.getName() + " is already connected to " + childWork.getName() + " before");
        }
    } else {
        LOG.debug("First pass. Leaf operator: " + operator);
    }
    // the next item will be a new root.
    if (!operator.getChildOperators().isEmpty()) {
        Preconditions.checkArgument(operator.getChildOperators().size() == 1, "AssertionError: expected operator.getChildOperators().size() to be 1, but was " + operator.getChildOperators().size());
        context.parentOfRoot = operator;
        context.currentRootOperator = operator.getChildOperators().get(0);
        context.preceedingWork = work;
    }
    return null;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ArrayList(java.util.ArrayList) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) LinkedList(java.util.LinkedList) SparkEdgeProperty(org.apache.hadoop.hive.ql.plan.SparkEdgeProperty) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 15 with SMBMapJoinOperator

use of org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator in project hive by apache.

the class AbstractSMBJoinProc method convertJoinToSMBJoin.

// Convert the join operator to a sort-merge join operator
protected void convertJoinToSMBJoin(JoinOperator joinOp, SortBucketJoinProcCtx smbJoinContext) throws SemanticException {
    MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext);
    SMBMapJoinOperator smbMapJoinOp = convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
    smbMapJoinOp.setConvertedAutomaticallySMBJoin(true);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)

Aggregations

SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)19 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)13 Operator (org.apache.hadoop.hive.ql.exec.Operator)11 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)11 ArrayList (java.util.ArrayList)8 Path (org.apache.hadoop.fs.Path)8 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)8 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)8 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 LinkedHashMap (java.util.LinkedHashMap)4 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)4 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)4 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)4 Task (org.apache.hadoop.hive.ql.exec.Task)4 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)4 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)4 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)4 HashMap (java.util.HashMap)3