Search in sources :

Example 11 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMapRedUtils method splitTasks.

@SuppressWarnings("nls")
private static /**
 * Split two tasks by creating a temporary file between them.
 *
 * @param op reduce sink operator being processed
 * @param parentTask the parent task
 * @param childTask the child task
 * @param opProcCtx context
 */
void splitTasks(ReduceSinkOperator op, Task<? extends Serializable> parentTask, Task<? extends Serializable> childTask, GenMRProcContext opProcCtx) throws SemanticException {
    if (op.getNumParent() != 1) {
        throw new IllegalStateException("Expecting operator " + op + " to have one parent. " + "But found multiple parents : " + op.getParentOperators());
    }
    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);
    // Root Task cannot depend on any other task, therefore childTask cannot be
    // a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask)) {
        rootTasks.remove(childTask);
    }
    // Generate the temporary file name
    Context baseCtx = parseCtx.getContext();
    Path taskTmpDir = baseCtx.getMRTmpPath();
    Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
    TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
    // Create the temporary file, its corresponding FileSinkOperaotr, and
    // its corresponding TableScanOperator.
    TableScanOperator tableScanOp = createTemporaryFile(parent, op, taskTmpDir, tt_desc, parseCtx);
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    mapCurrCtx.put(tableScanOp, new GenMapRedCtx(childTask, null));
    String streamDesc = taskTmpDir.toUri().toString();
    MapredWork cplan = (MapredWork) childTask.getWork();
    if (needsTagging(cplan.getReduceWork())) {
        Operator<? extends OperatorDesc> reducerOp = cplan.getReduceWork().getReducer();
        String id = null;
        if (reducerOp instanceof JoinOperator) {
            if (parseCtx.getJoinOps().contains(reducerOp)) {
                id = ((JoinOperator) reducerOp).getConf().getId();
            }
        } else if (reducerOp instanceof MapJoinOperator) {
            if (parseCtx.getMapJoinOps().contains(reducerOp)) {
                id = ((MapJoinOperator) reducerOp).getConf().getId();
            }
        } else if (reducerOp instanceof SMBMapJoinOperator) {
            if (parseCtx.getSmbMapJoinOps().contains(reducerOp)) {
                id = ((SMBMapJoinOperator) reducerOp).getConf().getId();
            }
        }
        if (id != null) {
            streamDesc = id + ":$INTNAME";
        } else {
            streamDesc = "$INTNAME";
        }
        String origStreamDesc = streamDesc;
        int pos = 0;
        while (cplan.getMapWork().getAliasToWork().get(streamDesc) != null) {
            streamDesc = origStreamDesc.concat(String.valueOf(++pos));
        }
        // TODO: Allocate work to remove the temporary files and make that
        // dependent on the redTask
        cplan.getReduceWork().setNeedsTagging(true);
    }
    // Add the path to alias mapping
    setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
    opProcCtx.setCurrTopOp(null);
    opProcCtx.setCurrAliasId(null);
    opProcCtx.setCurrTask(childTask);
    opProcCtx.addRootIfPossible(parentTask);
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Path(org.apache.hadoop.fs.Path) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) Serializable(java.io.Serializable) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 12 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMapRedUtils method joinPlan.

/**
 * Merge the current task into the old task for the reducer
 *
 * @param currTask
 *          the current task for the current reducer
 * @param oldTask
 *          the old task for the current reducer
 * @param opProcCtx
 *          processing context
 */
public static void joinPlan(Task<? extends Serializable> currTask, Task<? extends Serializable> oldTask, GenMRProcContext opProcCtx) throws SemanticException {
    assert currTask != null && oldTask != null;
    TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
    List<Task<? extends Serializable>> parTasks = null;
    // terminate the old task and make current task dependent on it
    if (currTask.getParentTasks() != null && !currTask.getParentTasks().isEmpty()) {
        parTasks = new ArrayList<Task<? extends Serializable>>();
        parTasks.addAll(currTask.getParentTasks());
        Object[] parTaskArr = parTasks.toArray();
        for (Object element : parTaskArr) {
            ((Task<? extends Serializable>) element).removeDependentTask(currTask);
        }
    }
    if (currTopOp != null) {
        mergeInput(currTopOp, opProcCtx, oldTask, false);
    }
    if (parTasks != null) {
        for (Task<? extends Serializable> parTask : parTasks) {
            parTask.addDependentTask(oldTask);
        }
    }
    if (oldTask instanceof MapRedTask && currTask instanceof MapRedTask) {
        ((MapRedTask) currTask).getWork().getMapWork().mergingInto(((MapRedTask) oldTask).getWork().getMapWork());
    }
    opProcCtx.setCurrTopOp(null);
    opProcCtx.setCurrTask(oldTask);
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) Serializable(java.io.Serializable)

Example 13 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMapRedUtils method createMRWorkForMergingFiles.

/**
 * @param fsInput The FileSink operator.
 * @param finalName the final destination path the merge job should output.
 * @param dependencyTask
 * @param mvTasks
 * @param conf
 * @param currTask
 * @param lineageState
 * @throws SemanticException
 *
 * create a Map-only merge job using CombineHiveInputFormat for all partitions with
 * following operators:
 *          MR job J0:
 *          ...
 *          |
 *          v
 *          FileSinkOperator_1 (fsInput)
 *          |
 *          v
 *          Merge job J1:
 *          |
 *          v
 *          TableScan (using CombineHiveInputFormat) (tsMerge)
 *          |
 *          v
 *          FileSinkOperator (fsMerge)
 *
 *          Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths
 *          do
 *          not contain the dynamic partitions (their parent). So after the dynamic partitions are
 *          created (after the first job finished before the moveTask or ConditionalTask start),
 *          we need to change the pathToPartitionInfo & pathToAlias to include the dynamic
 *          partition
 *          directories.
 */
public static void createMRWorkForMergingFiles(FileSinkOperator fsInput, Path finalName, DependencyCollectionTask dependencyTask, List<Task<MoveWork>> mvTasks, HiveConf conf, Task<? extends Serializable> currTask, LineageState lineageState) throws SemanticException {
    // 
    // 1. create the operator tree
    // 
    FileSinkDesc fsInputDesc = fsInput.getConf();
    if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
        Utilities.FILE_OP_LOGGER.trace("Creating merge work from " + System.identityHashCode(fsInput) + " with write ID " + (fsInputDesc.isMmTable() ? fsInputDesc.getTableWriteId() : null) + " into " + finalName);
    }
    boolean isBlockMerge = (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) || (conf.getBoolVar(ConfVars.HIVEMERGEORCFILESTRIPELEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class));
    RowSchema inputRS = fsInput.getSchema();
    Long srcMmWriteId = fsInputDesc.isMmTable() ? fsInputDesc.getTableWriteId() : null;
    FileSinkDesc fsOutputDesc = null;
    TableScanOperator tsMerge = null;
    if (!isBlockMerge) {
        // Create a TableScan operator
        tsMerge = GenMapRedUtils.createTemporaryTableScanOperator(fsInput.getCompilationOpContext(), inputRS);
        // Create a FileSink operator
        TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
        Path mergeDest = srcMmWriteId == null ? finalName : finalName.getParent();
        fsOutputDesc = new FileSinkDesc(mergeDest, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT));
        fsOutputDesc.setMmWriteId(srcMmWriteId);
        fsOutputDesc.setIsMerge(true);
        // Create and attach the filesink for the merge.
        OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge);
    }
    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
        // adding DP ColumnInfo to the RowSchema signature
        ArrayList<ColumnInfo> signature = inputRS.getSignature();
        String tblAlias = fsInputDesc.getTableInfo().getTableName();
        for (String dpCol : dpCtx.getDPColNames()) {
            ColumnInfo colInfo = new ColumnInfo(dpCol, // all partition column type should be string
            TypeInfoFactory.stringTypeInfo, tblAlias, // partition column is virtual column
            true);
            signature.add(colInfo);
        }
        inputRS.setSignature(signature);
        if (!isBlockMerge) {
            // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
            DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
            fsOutputDesc.setDynPartCtx(dpCtx2);
        }
        // update the FileSinkOperator to include partition columns
        usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), dpCtx.getDPColNames());
    } else {
        // non-partitioned table
        fsInputDesc.getTableInfo().getProperties().remove(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    }
    // 
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    // 
    Path inputDirName = fsInputDesc.getMergeInputDirName();
    MapWork cplan;
    Serializable work;
    if (isBlockMerge) {
        cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, fsInput.getCompilationOpContext());
        if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
            work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
            cplan.setName("File Merge");
            ((TezWork) work).add(cplan);
        } else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
            work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
            cplan.setName("Spark Merge File Work");
            ((SparkWork) work).add(cplan);
        } else {
            work = cplan;
        }
    } else {
        cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
        if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
            work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID), conf);
            cplan.setName("File Merge");
            ((TezWork) work).add(cplan);
        } else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
            work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
            cplan.setName("Spark Merge File Work");
            ((SparkWork) work).add(cplan);
        } else {
            work = new MapredWork();
            ((MapredWork) work).setMapWork(cplan);
        }
    }
    // use CombineHiveInputFormat for map-only merging
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
    // know if merge MR2 will be triggered at execution time
    MoveWork dummyMv = null;
    if (srcMmWriteId == null) {
        // Only create the movework for non-MM table. No action needed for a MM table.
        dummyMv = new MoveWork(null, null, null, new LoadFileDesc(inputDirName, finalName, true, null, null, false), false);
    }
    // Use the original fsOp path here in case of MM - while the new FSOP merges files inside the
    // MM directory, the original MoveTask still commits based on the parent. Note that this path
    // can only be triggered for a merge that's part of insert for now; MM tables do not support
    // concatenate. Keeping the old logic for non-MM tables with temp directories and stuff.
    Path fsopPath = srcMmWriteId != null ? fsInputDesc.getFinalDirName() : finalName;
    Task<MoveWork> mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fsopPath, fsInputDesc.isMmTable());
    ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, fsInputDesc.getMergeInputDirName(), finalName, mvTask, dependencyTask, lineageState);
    // keep the dynamic partition context in conditional task resolver context
    ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx();
    mrCtx.setDPCtx(fsInputDesc.getDynPartCtx());
    mrCtx.setLbCtx(fsInputDesc.getLbCtx());
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Serializable(java.io.Serializable) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ConditionalResolverMergeFilesCtx(org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 14 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class SharedWorkOptimizer method areMergeable.

// FIXME: probably this should also be integrated with isSame() logics
private static boolean areMergeable(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp1, TableScanOperator tsOp2) throws SemanticException {
    // First we check if the two table scan operators can actually be merged
    // If schemas do not match, we currently do not merge
    List<String> prevTsOpNeededColumns = tsOp1.getNeededColumns();
    List<String> tsOpNeededColumns = tsOp2.getNeededColumns();
    if (prevTsOpNeededColumns.size() != tsOpNeededColumns.size()) {
        return false;
    }
    boolean notEqual = false;
    for (int i = 0; i < prevTsOpNeededColumns.size(); i++) {
        if (!prevTsOpNeededColumns.get(i).equals(tsOpNeededColumns.get(i))) {
            notEqual = true;
            break;
        }
    }
    if (notEqual) {
        return false;
    }
    // If row limit does not match, we currently do not merge
    if (tsOp1.getConf().getRowLimit() != tsOp2.getConf().getRowLimit()) {
        return false;
    }
    // If partitions do not match, we currently do not merge
    PrunedPartitionList prevTsOpPPList = pctx.getPrunedPartitions(tsOp1);
    PrunedPartitionList tsOpPPList = pctx.getPrunedPartitions(tsOp2);
    if (!prevTsOpPPList.getPartitions().equals(tsOpPPList.getPartitions())) {
        return false;
    }
    // If is a DPP, check if actually it refers to same target, column, etc.
    // Further, the DPP value needs to be generated from same subtree
    List<Operator<?>> dppsOp1 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1));
    List<Operator<?>> dppsOp2 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp2));
    if (dppsOp1.isEmpty() && dppsOp2.isEmpty()) {
        return true;
    }
    for (int i = 0; i < dppsOp1.size(); i++) {
        Operator<?> op = dppsOp1.get(i);
        if (op instanceof ReduceSinkOperator) {
            Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
            if (ascendants.contains(tsOp2)) {
                // This should not happen, we cannot merge
                return false;
            }
        }
    }
    for (int i = 0; i < dppsOp2.size(); i++) {
        Operator<?> op = dppsOp2.get(i);
        if (op instanceof ReduceSinkOperator) {
            Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
            if (ascendants.contains(tsOp1)) {
                // This should not happen, we cannot merge
                return false;
            }
        }
    }
    if (dppsOp1.size() != dppsOp2.size()) {
        // Only first or second operator contains DPP pruning
        return false;
    }
    // Check if DPP branches are equal
    BitSet bs = new BitSet();
    for (int i = 0; i < dppsOp1.size(); i++) {
        Operator<?> dppOp1 = dppsOp1.get(i);
        for (int j = 0; j < dppsOp2.size(); j++) {
            if (!bs.get(j)) {
                // If not visited yet
                Operator<?> dppOp2 = dppsOp2.get(j);
                if (compareAndGatherOps(pctx, dppOp1, dppOp2) != null) {
                    // The DPP operator/branch are equal
                    bs.set(j);
                    break;
                }
            }
        }
        if (bs.cardinality() < i + 1) {
            return false;
        }
    }
    return true;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ArrayList(java.util.ArrayList) BitSet(java.util.BitSet)

Example 15 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class SharedWorkOptimizer method pushFilterToTopOfTableScan.

private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp) throws UDFArgumentException {
    ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
    List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
    for (Operator<? extends OperatorDesc> op : allChildren) {
        if (op instanceof FilterOperator) {
            FilterOperator filterOp = (FilterOperator) op;
            ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
            if (tableScanExprNode.isSame(filterExprNode)) {
                // We do not need to do anything
                return;
            }
            if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
                for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
                    if (childExprNode.isSame(filterExprNode)) {
                        // so probably we pushed previously
                        return;
                    }
                }
            }
            ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(), Arrays.<ExprNodeDesc>asList(tableScanExprNode.clone(), filterExprNode));
            filterOp.getConf().setPredicate(newPred);
        } else {
            Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature()));
            tsOp.replaceChild(op, newOp);
            newOp.getParentOperators().add(tsOp);
            op.replaceParent(tsOp, newOp);
            newOp.getChildOperators().add(op);
            // Add to cache (same group as tsOp)
            optimizerCache.putIfWorkExists(newOp, tsOp);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Aggregations

TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)133 Operator (org.apache.hadoop.hive.ql.exec.Operator)52 ArrayList (java.util.ArrayList)47 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)32 HashMap (java.util.HashMap)30 Path (org.apache.hadoop.fs.Path)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)29 Table (org.apache.hadoop.hive.ql.metadata.Table)26 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)25 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)24 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)23 LinkedHashMap (java.util.LinkedHashMap)22 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)22 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)22 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)22 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)21