Search in sources :

Example 6 with TezTask

use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.

the class TestUtilities method testGetTasksHaveNoRepeats.

/**
 * This test tests that Utilities.get*Tasks do not repeat themselves in the process
 * of extracting tasks from a given set of root tasks when given DAGs that can have
 * multiple paths, such as the case with Diamond-shaped DAGs common to replication.
 */
@Test
public void testGetTasksHaveNoRepeats() {
    CountingWrappingTask mrTask = new CountingWrappingTask(new ExecDriver());
    CountingWrappingTask tezTask = new CountingWrappingTask(new TezTask());
    CountingWrappingTask sparkTask = new CountingWrappingTask(new SparkTask());
    // First check - we should not have repeats in results
    assertEquals("No repeated MRTasks from Utilities.getMRTasks", 1, Utilities.getMRTasks(getTestDiamondTaskGraph(mrTask)).size());
    assertEquals("No repeated TezTasks from Utilities.getTezTasks", 1, Utilities.getTezTasks(getTestDiamondTaskGraph(tezTask)).size());
    assertEquals("No repeated TezTasks from Utilities.getSparkTasks", 1, Utilities.getSparkTasks(getTestDiamondTaskGraph(sparkTask)).size());
    // Second check - the tasks we looked for must not have been accessed more than
    // once as a result of the traversal (note that we actually wind up accessing
    // 2 times , because each visit counts twice, once to check for existence, and
    // once to visit.
    assertEquals("MRTasks should have been visited only once", 2, mrTask.getDepCallCount());
    assertEquals("TezTasks should have been visited only once", 2, tezTask.getDepCallCount());
    assertEquals("SparkTasks should have been visited only once", 2, sparkTask.getDepCallCount());
}
Also used : SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Test(org.junit.Test)

Example 7 with TezTask

use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.

the class GenericUDTFGetSplits method createPlanFragment.

public PlanFragment createPlanFragment(String query, int num, ApplicationId splitsAppId) throws HiveException {
    HiveConf conf = new HiveConf(SessionState.get().getConf());
    HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
    HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
    String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
    HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
    HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
    HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
    conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
    // Tez/LLAP requires RPC query plan
    HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
    try {
        jc = DagUtils.getInstance().createConfiguration(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    // Instantiate Driver to compile the query passed in.
    // This UDF is running as part of an existing query, which may already be using the
    // SessionState TxnManager. If this new Driver also tries to use the same TxnManager
    // then this may mess up the existing state of the TxnManager.
    // So initialize the new Driver with a new TxnManager so that it does not use the
    // Session TxnManager that is already in use.
    HiveTxnManager txnManager = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
    Driver driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build(), null, null, txnManager);
    DriverCleanup driverCleanup = new DriverCleanup(driver, txnManager, splitsAppId.toString());
    boolean needsCleanup = true;
    try {
        CommandProcessorResponse cpr = driver.compileAndRespond(query);
        if (cpr.getResponseCode() != 0) {
            throw new HiveException("Failed to compile query: " + cpr.getException());
        }
        QueryPlan plan = driver.getPlan();
        List<Task<?>> roots = plan.getRootTasks();
        Schema schema = convertSchema(plan.getResultSchema());
        if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
            throw new HiveException("Was expecting a single TezTask.");
        }
        TezWork tezWork = ((TezTask) roots.get(0)).getWork();
        if (tezWork.getAllWork().size() != 1) {
            String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
            String ctas = "create temporary table " + tableName + " as " + query;
            LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
            driver.releaseResources();
            HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
            cpr = driver.run(ctas, false);
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
            query = "select * from " + tableName;
            cpr = driver.compileAndRespond(query);
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            plan = driver.getPlan();
            roots = plan.getRootTasks();
            schema = convertSchema(plan.getResultSchema());
            if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
                throw new HiveException("Was expecting a single TezTask.");
            }
            tezWork = ((TezTask) roots.get(0)).getWork();
        } else {
            // The read will have READ_COMMITTED level semantics.
            try {
                driver.lockAndRespond();
            } catch (CommandProcessorResponse cpr1) {
                throw new HiveException("Failed to acquire locks", cpr1);
            }
            // Attach the resources to the session cleanup.
            SessionState.get().addCleanupItem(driverCleanup);
            needsCleanup = false;
        }
        // Pass the ValidTxnList and ValidTxnWriteIdList snapshot configurations corresponding to the input query
        HiveConf driverConf = driver.getConf();
        String validTxnString = driverConf.get(ValidTxnList.VALID_TXNS_KEY);
        if (validTxnString != null) {
            jc.set(ValidTxnList.VALID_TXNS_KEY, validTxnString);
        }
        String validWriteIdString = driverConf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
        if (validWriteIdString != null) {
            jc.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, validWriteIdString);
        }
        return new PlanFragment(tezWork, schema, jc);
    } finally {
        if (needsCleanup) {
            if (driverCleanup != null) {
                try {
                    driverCleanup.close();
                } catch (IOException err) {
                    throw new HiveException(err);
                }
            } else if (driver != null) {
                driver.close();
                driver.destroy();
            }
        }
    }
}
Also used : TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TaskSpecBuilder(org.apache.tez.dag.api.TaskSpecBuilder) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Schema(org.apache.hadoop.hive.llap.Schema) Driver(org.apache.hadoop.hive.ql.Driver) IOException(java.io.IOException) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 8 with TezTask

use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.

the class GenericUDTFGetSplits method createPlanFragment.

public PlanFragment createPlanFragment(String query, int num) throws HiveException {
    HiveConf conf = new HiveConf(SessionState.get().getConf());
    HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
    HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
    String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
    HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
    HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
    HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
    conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
    // Tez/LLAP requires RPC query plan
    HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
    try {
        jc = DagUtils.getInstance().createConfiguration(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    Driver driver = new Driver(conf);
    try {
        CommandProcessorResponse cpr = driver.compileAndRespond(query);
        if (cpr.getResponseCode() != 0) {
            throw new HiveException("Failed to compile query: " + cpr.getException());
        }
        QueryPlan plan = driver.getPlan();
        List<Task<?>> roots = plan.getRootTasks();
        Schema schema = convertSchema(plan.getResultSchema());
        if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
            throw new HiveException("Was expecting a single TezTask.");
        }
        TezWork tezWork = ((TezTask) roots.get(0)).getWork();
        if (tezWork.getAllWork().size() != 1) {
            String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
            String ctas = "create temporary table " + tableName + " as " + query;
            LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
            try {
                driver.resetQueryState();
                HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
                cpr = driver.run(ctas, false);
            } catch (CommandNeedRetryException e) {
                throw new HiveException(e);
            }
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
            query = "select * from " + tableName;
            cpr = driver.compileAndRespond(query);
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            plan = driver.getPlan();
            roots = plan.getRootTasks();
            schema = convertSchema(plan.getResultSchema());
            if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
                throw new HiveException("Was expecting a single TezTask.");
            }
            tezWork = ((TezTask) roots.get(0)).getWork();
        }
        return new PlanFragment(tezWork, schema, jc);
    } finally {
        driver.close();
        driver.destroy();
    }
}
Also used : TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Schema(org.apache.hadoop.hive.llap.Schema) Driver(org.apache.hadoop.hive.ql.Driver) IOException(java.io.IOException) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) CommandNeedRetryException(org.apache.hadoop.hive.ql.CommandNeedRetryException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 9 with TezTask

use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.

the class ATSHook method getExecutionMode.

protected ExecutionMode getExecutionMode(QueryPlan plan) {
    int numMRJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
    int numSparkJobs = Utilities.getSparkTasks(plan.getRootTasks()).size();
    int numTezJobs = Utilities.getTezTasks(plan.getRootTasks()).size();
    ExecutionMode mode = ExecutionMode.MR;
    if (0 == (numMRJobs + numSparkJobs + numTezJobs)) {
        mode = ExecutionMode.NONE;
    } else if (numSparkJobs > 0) {
        return ExecutionMode.SPARK;
    } else if (numTezJobs > 0) {
        mode = ExecutionMode.TEZ;
        // Need to go in and check if any of the tasks is running in LLAP mode.
        for (TezTask tezTask : Utilities.getTezTasks(plan.getRootTasks())) {
            if (tezTask.getWork().getLlapMode()) {
                mode = ExecutionMode.LLAP;
                break;
            }
        }
    }
    return mode;
}
Also used : TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask)

Example 10 with TezTask

use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.

the class PostExecOrcRowGroupCountPrinter method run.

@Override
public void run(HookContext hookContext) throws Exception {
    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
    HiveConf conf = hookContext.getConf();
    if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
        return;
    }
    LOG.info("Executing post execution hook to print ORC row groups read counter..");
    SessionState ss = SessionState.get();
    SessionState.LogHelper console = ss.getConsole();
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
    for (TezTask tezTask : rootTasks) {
        LOG.info("Printing ORC row group counter for tez task: " + tezTask.getName());
        TezCounters counters = tezTask.getTezCounters();
        if (counters != null) {
            for (CounterGroup group : counters) {
                if (group.getName().equals(LlapIOCounters.class.getName())) {
                    console.printInfo(tezTask.getId() + " LLAP IO COUNTERS:", false);
                    for (TezCounter counter : group) {
                        if (counter.getDisplayName().equals(LlapIOCounters.SELECTED_ROWGROUPS.name())) {
                            console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
                        }
                    }
                }
            }
        }
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) LlapIOCounters(org.apache.hadoop.hive.llap.counters.LlapIOCounters) CounterGroup(org.apache.tez.common.counters.CounterGroup) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezCounter(org.apache.tez.common.counters.TezCounter) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) TezCounters(org.apache.tez.common.counters.TezCounters)

Aggregations

TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)13 HiveConf (org.apache.hadoop.hive.conf.HiveConf)6 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)6 Task (org.apache.hadoop.hive.ql.exec.Task)5 TezWork (org.apache.hadoop.hive.ql.plan.TezWork)5 ArrayList (java.util.ArrayList)4 List (java.util.List)4 Serializable (java.io.Serializable)3 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)3 IOException (java.io.IOException)2 Schema (org.apache.hadoop.hive.llap.Schema)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 Driver (org.apache.hadoop.hive.ql.Driver)2 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)2 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)2 SessionState (org.apache.hadoop.hive.ql.session.SessionState)2 TezCounter (org.apache.tez.common.counters.TezCounter)2 TezCounters (org.apache.tez.common.counters.TezCounters)2