Search in sources :

Example 1 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class Driver method getTablePartitionUsedColumns.

private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols, Map<String, Boolean> tableUsePartLevelAuth) throws HiveException {
    // table to columns mapping (tab2Cols)
    if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
        ParseContext parseCtx = querySem.getParseContext();
        for (Map.Entry<String, TableScanOperator> topOpMap : querySem.getParseContext().getTopOps().entrySet()) {
            TableScanOperator tableScanOp = topOpMap.getValue();
            if (!tableScanOp.isInsideView()) {
                Table tbl = tableScanOp.getConf().getTableMetadata();
                List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
                List<FieldSchema> columns = tbl.getCols();
                List<String> cols = new ArrayList<String>();
                for (int i = 0; i < neededColumnIds.size(); i++) {
                    cols.add(columns.get(neededColumnIds.get(i)).getName());
                }
                // table permission
                if (tbl.isPartitioned() && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
                    String alias_id = topOpMap.getKey();
                    PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id);
                    Set<Partition> parts = partsList.getPartitions();
                    for (Partition part : parts) {
                        List<String> existingCols = part2Cols.get(part);
                        if (existingCols == null) {
                            existingCols = new ArrayList<String>();
                        }
                        existingCols.addAll(cols);
                        part2Cols.put(part, existingCols);
                    }
                } else {
                    List<String> existingCols = tab2Cols.get(tbl);
                    if (existingCols == null) {
                        existingCols = new ArrayList<String>();
                    }
                    existingCols.addAll(cols);
                    tab2Cols.put(tbl, existingCols);
                }
            }
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) ImportSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 2 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class SparkCompiler method generateTaskTree.

/**
 * TODO: need to turn on rules that's commented out and add more if necessary.
 */
@Override
protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE);
    GenSparkUtils utils = GenSparkUtils.getUtils();
    utils.resetSequenceNumber();
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenSparkProcContext procCtx = new GenSparkProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs, pCtx.getTopOps());
    // -------------------------------- First Pass ---------------------------------- //
    // Identify SparkPartitionPruningSinkOperators, and break OP tree if necessary
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Clone OP tree for PartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SplitOpTreeForDPP());
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    // -------------------------------- Second Pass ---------------------------------- //
    // Process operator tree in two steps: first we process the extra op trees generated
    // in the first pass. Then we process the main op tree, and the result task will depend
    // on the task generated in the first pass.
    topNodes.clear();
    topNodes.addAll(procCtx.topOps.values());
    generateTaskTreeHelper(procCtx, topNodes);
    // the partitions used.
    if (!procCtx.clonedPruningTableScanSet.isEmpty()) {
        SparkTask pruningTask = SparkUtilities.createSparkTask(conf);
        SparkTask mainTask = procCtx.currentTask;
        pruningTask.addDependentTask(procCtx.currentTask);
        procCtx.rootTasks.remove(procCtx.currentTask);
        procCtx.rootTasks.add(pruningTask);
        procCtx.currentTask = pruningTask;
        topNodes.clear();
        topNodes.addAll(procCtx.clonedPruningTableScanSet);
        generateTaskTreeHelper(procCtx, topNodes);
        procCtx.currentTask = mainTask;
    }
    // we need to clone some operator plans and remove union operators still
    for (BaseWork w : procCtx.workWithUnionOperators) {
        GenSparkUtils.getUtils().removeUnionOperators(procCtx, w);
    }
    // we need to fill MapWork with 'local' work and bucket information for SMB Join.
    GenSparkUtils.getUtils().annotateMapWork(procCtx);
    // finally make sure the file sink operators are set up right
    for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
        GenSparkUtils.getUtils().processFileSink(procCtx, fileSink);
    }
    // Process partition pruning sinks
    for (Operator<?> prunerSink : procCtx.pruningSinkSet) {
        utils.processPartitionPruningSink(procCtx, (SparkPartitionPruningSinkOperator) prunerSink);
    }
    PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)

Example 3 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class GenMapRedUtils method splitTasks.

@SuppressWarnings("nls")
private static /**
 * Split two tasks by creating a temporary file between them.
 *
 * @param op reduce sink operator being processed
 * @param parentTask the parent task
 * @param childTask the child task
 * @param opProcCtx context
 */
void splitTasks(ReduceSinkOperator op, Task<? extends Serializable> parentTask, Task<? extends Serializable> childTask, GenMRProcContext opProcCtx) throws SemanticException {
    if (op.getNumParent() != 1) {
        throw new IllegalStateException("Expecting operator " + op + " to have one parent. " + "But found multiple parents : " + op.getParentOperators());
    }
    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);
    // Root Task cannot depend on any other task, therefore childTask cannot be
    // a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask)) {
        rootTasks.remove(childTask);
    }
    // Generate the temporary file name
    Context baseCtx = parseCtx.getContext();
    Path taskTmpDir = baseCtx.getMRTmpPath();
    Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
    TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
    // Create the temporary file, its corresponding FileSinkOperaotr, and
    // its corresponding TableScanOperator.
    TableScanOperator tableScanOp = createTemporaryFile(parent, op, taskTmpDir, tt_desc, parseCtx);
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    mapCurrCtx.put(tableScanOp, new GenMapRedCtx(childTask, null));
    String streamDesc = taskTmpDir.toUri().toString();
    MapredWork cplan = (MapredWork) childTask.getWork();
    if (needsTagging(cplan.getReduceWork())) {
        Operator<? extends OperatorDesc> reducerOp = cplan.getReduceWork().getReducer();
        String id = null;
        if (reducerOp instanceof JoinOperator) {
            if (parseCtx.getJoinOps().contains(reducerOp)) {
                id = ((JoinOperator) reducerOp).getConf().getId();
            }
        } else if (reducerOp instanceof MapJoinOperator) {
            if (parseCtx.getMapJoinOps().contains(reducerOp)) {
                id = ((MapJoinOperator) reducerOp).getConf().getId();
            }
        } else if (reducerOp instanceof SMBMapJoinOperator) {
            if (parseCtx.getSmbMapJoinOps().contains(reducerOp)) {
                id = ((SMBMapJoinOperator) reducerOp).getConf().getId();
            }
        }
        if (id != null) {
            streamDesc = id + ":$INTNAME";
        } else {
            streamDesc = "$INTNAME";
        }
        String origStreamDesc = streamDesc;
        int pos = 0;
        while (cplan.getMapWork().getAliasToWork().get(streamDesc) != null) {
            streamDesc = origStreamDesc.concat(String.valueOf(++pos));
        }
        // TODO: Allocate work to remove the temporary files and make that
        // dependent on the redTask
        cplan.getReduceWork().setNeedsTagging(true);
    }
    // Add the path to alias mapping
    setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
    opProcCtx.setCurrTopOp(null);
    opProcCtx.setCurrAliasId(null);
    opProcCtx.setCurrTask(childTask);
    opProcCtx.addRootIfPossible(parentTask);
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Path(org.apache.hadoop.fs.Path) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) Serializable(java.io.Serializable) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 4 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class GenMapRedUtils method createMoveTask.

/**
 * Create and add any dependent move tasks
 *
 * @param currTask
 * @param chDir
 * @param fsOp
 * @param parseCtx
 * @param mvTasks
 * @param hconf
 * @param dependencyTask
 * @return
 */
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    FileSinkDesc fileSinkDesc = fsOp.getConf();
    boolean isMmTable = fileSinkDesc.isMmTable();
    if (chDir) {
        dest = fileSinkDesc.getMergeInputDirName();
        if (!isMmTable) {
            // generate the temporary file
            // it must be on the same file system as the current destination
            Context baseCtx = parseCtx.getContext();
            // Create the required temporary file in the HDFS location if the destination
            // path of the FileSinkOperator table is a blobstore path.
            Path tmpDir = baseCtx.getTempDirForFinalJobPath(fileSinkDesc.getDestPath());
            // Change all the linked file sink descriptors
            if (fileSinkDesc.isLinkedFileSink()) {
                for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                    fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName()));
                    if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                        Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir for LinkedFileSink chDir " + fsConf.getDirName() + "; dest was " + fileSinkDesc.getDestPath());
                    }
                }
            } else {
                fileSinkDesc.setDirName(tmpDir);
                if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                    Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir chDir " + tmpDir + "; dest was " + fileSinkDesc.getDestPath());
                }
            }
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fsOp.getConf().getFinalDirName(), fsOp.getConf().isMmTable());
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 5 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class ExplainTask method getLocks.

private JSONObject getLocks(PrintStream out, ExplainWork work) {
    JSONObject jsonObject = new JSONObject(new LinkedHashMap<>());
    boolean jsonOutput = work.isFormatted();
    if (jsonOutput) {
        out = null;
    }
    Operation operation = Optional.of(work).map(ExplainWork::getParseContext).map(ParseContext::getContext).map(Context::getOperation).orElse(Operation.OTHER);
    List<LockComponent> lockComponents = AcidUtils.makeLockComponents(work.getOutputs(), work.getInputs(), operation, conf);
    if (null != out) {
        out.print("LOCK INFORMATION:\n");
    }
    List<ExplainLockDesc> locks = new ArrayList<>(lockComponents.size());
    for (LockComponent component : lockComponents) {
        ExplainLockDesc lockDesc = new ExplainLockDesc(component);
        if (null != out) {
            out.print(lockDesc.getFullName());
            out.print(" -> ");
            out.print(lockDesc.getLockType());
            out.print('\n');
        } else {
            locks.add(lockDesc);
        }
    }
    if (jsonOutput) {
        jsonObject.put("LOCK INFORMATION:", locks);
    }
    return jsonObject;
}
Also used : LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) JSONObject(org.json.JSONObject) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) ArrayList(java.util.ArrayList) Operation(org.apache.hadoop.hive.ql.Context.Operation) HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) ExplainLockDesc(org.apache.hadoop.hive.ql.plan.ExplainLockDesc)

Aggregations

ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)35 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)15 ArrayList (java.util.ArrayList)14 Path (org.apache.hadoop.fs.Path)12 Context (org.apache.hadoop.hive.ql.Context)12 Node (org.apache.hadoop.hive.ql.lib.Node)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 LinkedHashMap (java.util.LinkedHashMap)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Task (org.apache.hadoop.hive.ql.exec.Task)7 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)7 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)7 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)6 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)6 Partition (org.apache.hadoop.hive.ql.metadata.Partition)6 Table (org.apache.hadoop.hive.ql.metadata.Table)6 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)6 FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)6