Examples with JoinOperator - org.apache.hadoop.hive.ql.exec.JoinOperator

Example 16 with JoinOperator

use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.

the class SortedMergeJoinProc method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    JoinOperator joinOp = (JoinOperator) nd;
    SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;
    boolean convert = canConvertJoinToSMBJoin(joinOp, smbJoinContext);
    if (convert) {
        convertJoinToSMBJoin(joinOp, smbJoinContext);
    }
    return null;
}

Also used : JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator)

Example 17 with JoinOperator

use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.

the class SparkSortMergeJoinOptimizer method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    JoinOperator joinOp = (JoinOperator) nd;
    HiveConf conf = ((OptimizeSparkProcContext) procCtx).getParseContext().getConf();
    if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN)) {
        return null;
    }
    SortBucketJoinProcCtx smbJoinContext = new SortBucketJoinProcCtx(conf);
    boolean convert = canConvertJoinToSMBJoin(joinOp, smbJoinContext, pGraphContext, stack);
    if (convert) {
        return convertJoinToSMBJoinAndReturn(joinOp, smbJoinContext);
    }
    return null;
}

Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SortBucketJoinProcCtx(org.apache.hadoop.hive.ql.optimizer.SortBucketJoinProcCtx) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 18 with JoinOperator

use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.

the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.

// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
    // This has already been inspected and rejected
    if (context.getRejectedJoinOps().contains(joinOp)) {
        return false;
    }
    if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
        return false;
    }
    Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
    try {
        String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
        bigTableMatcherClass = JavaUtils.loadClass(selector);
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e.getMessage());
    }
    BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
        // of any type. So return false.
        return false;
    }
    int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
    if (bigTablePosition < 0) {
        // contains aliases from sub-query
        return false;
    }
    context.setBigTablePosition(bigTablePosition);
    String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
    joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
    // get the join keys from parent ReduceSink operators
    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
        ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
        Byte tag = (byte) rsconf.getTag();
        List<ExprNodeDesc> keys = rsconf.getKeyCols();
        keyExprMap.put(tag, keys);
    }
    context.setKeyExprMap(keyExprMap);
    // Make a deep copy of the aliases so that they are not changed in the context
    String[] joinSrcs = joinOp.getConf().getBaseSrc();
    String[] srcs = new String[joinSrcs.length];
    for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
        joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
        srcs[srcPos] = new String(joinSrcs[srcPos]);
    }
    // table matcher.
    return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}

Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) HashMap(java.util.HashMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 19 with JoinOperator

use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.

the class SemanticAnalyzer method analyzeInternal.

void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
    // 1. Generate Resolved Parse tree from syntax tree
    LOG.info("Starting Semantic Analysis");
    //change the location of position alias process here
    processPositionAlias(ast);
    if (!genResolvedParseTree(ast, plannerCtx)) {
        return;
    }
    // 2. Gen OP Tree from resolved Parse Tree
    Operator sinkOp = genOPTree(ast, plannerCtx);
    if (!unparseTranslator.isEnabled() && tableMask.isEnabled()) {
        // Here we rewrite the * and also the masking table
        ASTNode tree = rewriteASTWithMaskAndFilter(ast);
        if (tree != ast) {
            ctx.setSkipTableMasking(true);
            init(true);
            //change the location of position alias process here
            processPositionAlias(tree);
            genResolvedParseTree(tree, plannerCtx);
            if (this instanceof CalcitePlanner) {
                ((CalcitePlanner) this).resetCalciteConfiguration();
            }
            sinkOp = genOPTree(tree, plannerCtx);
        }
    }
    // 3. Deduce Resultset Schema
    if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
        resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
        // succeeds.
        if (resultSchema == null) {
            resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
        }
    }
    // 4. Generate Parse Context for Optimizer & Physical compiler
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
    // 5. Take care of view creation
    if (createVwDesc != null) {
        if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
            return;
        }
        if (!ctx.isCboSucceeded()) {
            saveViewDefinition();
        }
        // validate the create view statement at this point, the createVwDesc gets
        // all the information for semanticcheck
        validateCreateView();
        if (!createVwDesc.isMaterialized()) {
            // Since we're only creating a view (not executing it), we don't need to
            // optimize or translate the plan (and in fact, those procedures can
            // interfere with the view creation). So skip the rest of this method.
            ctx.setResDir(null);
            ctx.setResFile(null);
            try {
                PlanUtils.addInputsForView(pCtx);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            // Generate lineage info for create view statements
            // if LineageLogger hook is configured.
            // Add the transformation that computes the lineage information.
            Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
            if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
                ArrayList<Transform> transformations = new ArrayList<Transform>();
                transformations.add(new HiveOpConverterPostProc());
                transformations.add(new Generator());
                for (Transform t : transformations) {
                    pCtx = t.transform(pCtx);
                }
                // we just use view name as location.
                SessionState.get().getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
            }
            return;
        }
    }
    // 6. Generate table access stats if required
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
        TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
        setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
    }
    // 7. Perform Logical optimization
    if (LOG.isDebugEnabled()) {
        LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    if (pCtx.getColumnAccessInfo() != null) {
        // set ColumnAccessInfo for view column authorization
        setColumnAccessInfo(pCtx.getColumnAccessInfo());
    }
    FetchTask origFetchTask = pCtx.getFetchTask();
    if (LOG.isDebugEnabled()) {
        LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    // 8. Generate column access stats if required - wait until column pruning
    // takes place during optimization
    boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
    if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
        // view column access info is carried by this.getColumnAccessInfo().
        setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
    }
    // TEZ..)
    if (!ctx.getExplainLogical()) {
        TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
        compiler.init(queryState, console, db);
        compiler.compile(pCtx, rootTasks, inputs, outputs);
        fetchTask = pCtx.getFetchTask();
    }
    LOG.info("Completed plan generation");
    // 10. put accessed columns to readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }
    // 11. if desired check we're not going over partition scan limits
    if (!ctx.isExplainSkipExecution()) {
        enforceScanLimits(pCtx, origFetchTask);
    }
    return;
}

Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) ArrayList(java.util.ArrayList) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Example 20 with JoinOperator

use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.

the class MapJoinProcessor method convertJoinOpMapJoinOp.

public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException {
    MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin, adjustParentsChildren);
    // reduce sink row resolver used to generate map join op
    RowSchema outputRS = op.getSchema();
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(op.getCompilationOpContext(), mapJoinDescriptor, new RowSchema(outputRS.getSignature()), op.getParentOperators());
    mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    mapJoinOp.setColumnExprMap(colExprMap);
    List<Operator<? extends OperatorDesc>> childOps = op.getChildOperators();
    for (Operator<? extends OperatorDesc> childOp : childOps) {
        childOp.replaceParent(op, mapJoinOp);
    }
    mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
    mapJoinOp.setChildOperators(childOps);
    op.setChildOperators(null);
    op.setParentOperators(null);
    return mapJoinOp;
}

Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)36 Operator (org.apache.hadoop.hive.ql.exec.Operator)20 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)18 ArrayList (java.util.ArrayList)16 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)14 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)14 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)11 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)11 HashMap (java.util.HashMap)10 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)10 List (java.util.List)9 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)9 Path (org.apache.hadoop.fs.Path)8 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)8 HashSet (java.util.HashSet)7 LinkedHashMap (java.util.LinkedHashMap)7 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)7