Search in sources :

Example 1 with Task

use of org.apache.hadoop.hive.ql.exec.Task in project hive by apache.

the class AuthorizationTestUtil method analyze.

/**
   * Create DDLWork from given ast
   * @param ast
   * @param conf
   * @param db
   * @return
   * @throws Exception
   */
public static DDLWork analyze(ASTNode ast, QueryState queryState, Hive db) throws Exception {
    DDLSemanticAnalyzer analyzer = new DDLSemanticAnalyzer(queryState, db);
    SessionState.start(queryState.getConf());
    analyzer.analyze(ast, new Context(queryState.getConf()));
    List<Task<? extends Serializable>> rootTasks = analyzer.getRootTasks();
    return (DDLWork) inList(rootTasks).ofSize(1).get(0).getWork();
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) DDLSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer)

Example 2 with Task

use of org.apache.hadoop.hive.ql.exec.Task in project hive by apache.

the class TestConditionalResolverCommonJoin method testResolvingDriverAlias.

@Test
public void testResolvingDriverAlias() throws Exception {
    ConditionalResolverCommonJoin resolver = new ConditionalResolverCommonJoin();
    HashMap<Path, ArrayList<String>> pathToAliases = new HashMap<>();
    pathToAliases.put(new Path("path1"), new ArrayList<String>(Arrays.asList("alias1", "alias2")));
    pathToAliases.put(new Path("path2"), new ArrayList<String>(Arrays.asList("alias3")));
    HashMap<String, Long> aliasToKnownSize = new HashMap<String, Long>();
    aliasToKnownSize.put("alias1", 1024l);
    aliasToKnownSize.put("alias2", 2048l);
    aliasToKnownSize.put("alias3", 4096l);
    DDLTask task1 = new DDLTask();
    task1.setId("alias2");
    DDLTask task2 = new DDLTask();
    task2.setId("alias3");
    // joins alias1, alias2, alias3 (alias1 was not eligible for big pos)
    // Must be deterministic order map for consistent q-test output across Java versions
    HashMap<Task<? extends Serializable>, Set<String>> taskToAliases = new LinkedHashMap<Task<? extends Serializable>, Set<String>>();
    taskToAliases.put(task1, new HashSet<String>(Arrays.asList("alias2")));
    taskToAliases.put(task2, new HashSet<String>(Arrays.asList("alias3")));
    ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx ctx = new ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx();
    ctx.setPathToAliases(pathToAliases);
    ctx.setTaskToAliases(taskToAliases);
    ctx.setAliasToKnownSize(aliasToKnownSize);
    HiveConf conf = new HiveConf();
    conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 4096);
    // alias3 only can be selected
    Task resolved = resolver.resolveMapJoinTask(ctx, conf);
    Assert.assertEquals("alias3", resolved.getId());
    conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 65536);
    // alias1, alias2, alias3 all can be selected but overriden by biggest one (alias3)
    resolved = resolver.resolveMapJoinTask(ctx, conf);
    Assert.assertEquals("alias3", resolved.getId());
    conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 2048);
    // not selected
    resolved = resolver.resolveMapJoinTask(ctx, conf);
    Assert.assertNull(resolved);
}
Also used : Path(org.apache.hadoop.fs.Path) DDLTask(org.apache.hadoop.hive.ql.exec.DDLTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) DDLTask(org.apache.hadoop.hive.ql.exec.DDLTask) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Test(org.junit.Test)

Example 3 with Task

use of org.apache.hadoop.hive.ql.exec.Task in project hive by apache.

the class TestQBCompact method parseAndAnalyze.

private AlterTableSimpleDesc parseAndAnalyze(String query) throws Exception {
    ParseDriver hd = new ParseDriver();
    ASTNode head = (ASTNode) hd.parse(query).getChild(0);
    BaseSemanticAnalyzer a = SemanticAnalyzerFactory.get(queryState, head);
    a.analyze(head, new Context(conf));
    List<Task<? extends Serializable>> roots = a.getRootTasks();
    Assert.assertEquals(1, roots.size());
    return ((DDLWork) roots.get(0).getWork()).getAlterTblSimpleDesc();
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork)

Example 4 with Task

use of org.apache.hadoop.hive.ql.exec.Task in project hive by apache.

the class CompactIndexHandler method generateIndexQuery.

@Override
public void generateIndexQuery(List<Index> indexes, ExprNodeDesc predicate, ParseContext pctx, HiveIndexQueryContext queryContext) {
    Index index = indexes.get(0);
    DecomposedPredicate decomposedPredicate = decomposePredicate(predicate, index, queryContext.getQueryPartitions());
    if (decomposedPredicate == null) {
        queryContext.setQueryTasks(null);
        // abort if we couldn't pull out anything from the predicate
        return;
    }
    // pass residual predicate back out for further processing
    queryContext.setResidualPredicate(decomposedPredicate.residualPredicate);
    // setup TableScanOperator to change input format for original query
    queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
    // Build reentrant QL for index query
    StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
    String tmpFile = pctx.getContext().getMRTmpPath().toUri().toString();
    queryContext.setIndexIntermediateFile(tmpFile);
    // QL includes " around file name
    qlCommand.append("\"" + tmpFile + "\" ");
    qlCommand.append("SELECT `_bucketname` ,  `_offsets` FROM ");
    qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName()));
    qlCommand.append(" WHERE ");
    String predicateString = decomposedPredicate.pushedPredicate.getExprString();
    qlCommand.append(predicateString);
    // generate tasks from index query string
    LOG.info("Generating tasks for re-entrant QL query: " + qlCommand.toString());
    HiveConf queryConf = new HiveConf(pctx.getConf(), CompactIndexHandler.class);
    HiveConf.setBoolVar(queryConf, HiveConf.ConfVars.COMPRESSRESULT, false);
    Driver driver = new Driver(queryConf);
    driver.compile(qlCommand.toString(), false);
    if (pctx.getConf().getBoolVar(ConfVars.HIVE_INDEX_COMPACT_BINARY_SEARCH) && useSorted) {
        // For now, only works if the predicate is a single condition
        MapWork work = null;
        String originalInputFormat = null;
        for (Task task : driver.getPlan().getRootTasks()) {
            // Otherwise something is wrong, log the problem and continue using the default format
            if (task.getWork() instanceof MapredWork) {
                if (work != null) {
                    LOG.error("Tried to use a binary search on a compact index but there were an " + "unexpected number (>1) of root level map reduce tasks in the " + "reentrant query plan.");
                    work.setInputformat(null);
                    work.setInputFormatSorted(false);
                    break;
                }
                if (task.getWork() != null) {
                    work = ((MapredWork) task.getWork()).getMapWork();
                }
                String inputFormat = work.getInputformat();
                originalInputFormat = inputFormat;
                if (inputFormat == null) {
                    inputFormat = HiveConf.getVar(pctx.getConf(), HiveConf.ConfVars.HIVEINPUTFORMAT);
                }
                // and BucketizedHiveInputFormat
                try {
                    if (!HiveInputFormat.class.isAssignableFrom(JavaUtils.loadClass(inputFormat))) {
                        work = null;
                        break;
                    }
                } catch (ClassNotFoundException e) {
                    LOG.error("Map reduce work's input format class: " + inputFormat + " was not found. " + "Cannot use the fact the compact index is sorted.");
                    work = null;
                    break;
                }
                work.setInputFormatSorted(true);
            }
        }
        if (work != null) {
            // Find the filter operator and expr node which act on the index column and mark them
            if (!findIndexColumnFilter(work.getAliasToWork().values())) {
                LOG.error("Could not locate the index column's filter operator and expr node. Cannot " + "use the fact the compact index is sorted.");
                work.setInputformat(originalInputFormat);
                work.setInputFormatSorted(false);
            }
        }
    }
    queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
    queryContext.setQueryTasks(driver.getPlan().getRootTasks());
    return;
}
Also used : DecomposedPredicate(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) Task(org.apache.hadoop.hive.ql.exec.Task) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) Driver(org.apache.hadoop.hive.ql.Driver) Index(org.apache.hadoop.hive.metastore.api.Index) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 5 with Task

use of org.apache.hadoop.hive.ql.exec.Task in project hive by apache.

the class GenMapRedUtils method joinUnionPlan.

/*
   * join current union task to old task
   */
public static void joinUnionPlan(GenMRProcContext opProcCtx, UnionOperator currUnionOp, Task<? extends Serializable> currentUnionTask, Task<? extends Serializable> existingTask, boolean local) throws SemanticException {
    assert currUnionOp != null;
    GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp);
    assert uCtx != null;
    setUnionPlan(opProcCtx, local, existingTask, uCtx, true);
    List<Task<? extends Serializable>> parTasks = null;
    if (opProcCtx.getRootTasks().contains(currentUnionTask)) {
        opProcCtx.getRootTasks().remove(currentUnionTask);
        if (!opProcCtx.getRootTasks().contains(existingTask) && (existingTask.getParentTasks() == null || existingTask.getParentTasks().isEmpty())) {
            opProcCtx.getRootTasks().add(existingTask);
        }
    }
    if ((currentUnionTask != null) && (currentUnionTask.getParentTasks() != null) && !currentUnionTask.getParentTasks().isEmpty()) {
        parTasks = new ArrayList<Task<? extends Serializable>>();
        parTasks.addAll(currentUnionTask.getParentTasks());
        Object[] parTaskArr = parTasks.toArray();
        for (Object parTask : parTaskArr) {
            ((Task<? extends Serializable>) parTask).removeDependentTask(currentUnionTask);
        }
    }
    if ((currentUnionTask != null) && (parTasks != null)) {
        for (Task<? extends Serializable> parTask : parTasks) {
            parTask.addDependentTask(existingTask);
            if (opProcCtx.getRootTasks().contains(existingTask)) {
                opProcCtx.getRootTasks().remove(existingTask);
            }
        }
    }
    opProcCtx.setCurrTask(existingTask);
}
Also used : SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) Serializable(java.io.Serializable) GenMRUnionCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx)

Aggregations

Task (org.apache.hadoop.hive.ql.exec.Task)50 Serializable (java.io.Serializable)40 ArrayList (java.util.ArrayList)21 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)20 Path (org.apache.hadoop.fs.Path)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)15 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)15 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)15 SparkTask (org.apache.hadoop.hive.ql.exec.spark.SparkTask)12 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)12 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)11 FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)10 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)10 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)9 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)9 List (java.util.List)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)8 IOException (java.io.IOException)7