use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.
the class TestUtilities method testGetTasksHaveNoRepeats.
/**
* This test tests that Utilities.get*Tasks do not repeat themselves in the process
* of extracting tasks from a given set of root tasks when given DAGs that can have
* multiple paths, such as the case with Diamond-shaped DAGs common to replication.
*/
@Test
public void testGetTasksHaveNoRepeats() {
CountingWrappingTask mrTask = new CountingWrappingTask(new ExecDriver());
CountingWrappingTask tezTask = new CountingWrappingTask(new TezTask());
CountingWrappingTask sparkTask = new CountingWrappingTask(new SparkTask());
// First check - we should not have repeats in results
assertEquals("No repeated MRTasks from Utilities.getMRTasks", 1, Utilities.getMRTasks(getTestDiamondTaskGraph(mrTask)).size());
assertEquals("No repeated TezTasks from Utilities.getTezTasks", 1, Utilities.getTezTasks(getTestDiamondTaskGraph(tezTask)).size());
assertEquals("No repeated TezTasks from Utilities.getSparkTasks", 1, Utilities.getSparkTasks(getTestDiamondTaskGraph(sparkTask)).size());
// Second check - the tasks we looked for must not have been accessed more than
// once as a result of the traversal (note that we actually wind up accessing
// 2 times , because each visit counts twice, once to check for existence, and
// once to visit.
assertEquals("MRTasks should have been visited only once", 2, mrTask.getDepCallCount());
assertEquals("TezTasks should have been visited only once", 2, tezTask.getDepCallCount());
assertEquals("SparkTasks should have been visited only once", 2, sparkTask.getDepCallCount());
}
use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.
the class GenericUDTFGetSplits method createPlanFragment.
public PlanFragment createPlanFragment(String query, int num, ApplicationId splitsAppId) throws HiveException {
HiveConf conf = new HiveConf(SessionState.get().getConf());
HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
// Tez/LLAP requires RPC query plan
HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
try {
jc = DagUtils.getInstance().createConfiguration(conf);
} catch (IOException e) {
throw new HiveException(e);
}
// Instantiate Driver to compile the query passed in.
// This UDF is running as part of an existing query, which may already be using the
// SessionState TxnManager. If this new Driver also tries to use the same TxnManager
// then this may mess up the existing state of the TxnManager.
// So initialize the new Driver with a new TxnManager so that it does not use the
// Session TxnManager that is already in use.
HiveTxnManager txnManager = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
Driver driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build(), null, null, txnManager);
DriverCleanup driverCleanup = new DriverCleanup(driver, txnManager, splitsAppId.toString());
boolean needsCleanup = true;
try {
CommandProcessorResponse cpr = driver.compileAndRespond(query);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to compile query: " + cpr.getException());
}
QueryPlan plan = driver.getPlan();
List<Task<?>> roots = plan.getRootTasks();
Schema schema = convertSchema(plan.getResultSchema());
if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
throw new HiveException("Was expecting a single TezTask.");
}
TezWork tezWork = ((TezTask) roots.get(0)).getWork();
if (tezWork.getAllWork().size() != 1) {
String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
String ctas = "create temporary table " + tableName + " as " + query;
LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
driver.releaseResources();
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
cpr = driver.run(ctas, false);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to create temp table: " + cpr.getException());
}
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
query = "select * from " + tableName;
cpr = driver.compileAndRespond(query);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to create temp table: " + cpr.getException());
}
plan = driver.getPlan();
roots = plan.getRootTasks();
schema = convertSchema(plan.getResultSchema());
if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
throw new HiveException("Was expecting a single TezTask.");
}
tezWork = ((TezTask) roots.get(0)).getWork();
} else {
// The read will have READ_COMMITTED level semantics.
try {
driver.lockAndRespond();
} catch (CommandProcessorResponse cpr1) {
throw new HiveException("Failed to acquire locks", cpr1);
}
// Attach the resources to the session cleanup.
SessionState.get().addCleanupItem(driverCleanup);
needsCleanup = false;
}
// Pass the ValidTxnList and ValidTxnWriteIdList snapshot configurations corresponding to the input query
HiveConf driverConf = driver.getConf();
String validTxnString = driverConf.get(ValidTxnList.VALID_TXNS_KEY);
if (validTxnString != null) {
jc.set(ValidTxnList.VALID_TXNS_KEY, validTxnString);
}
String validWriteIdString = driverConf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
if (validWriteIdString != null) {
jc.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, validWriteIdString);
}
return new PlanFragment(tezWork, schema, jc);
} finally {
if (needsCleanup) {
if (driverCleanup != null) {
try {
driverCleanup.close();
} catch (IOException err) {
throw new HiveException(err);
}
} else if (driver != null) {
driver.close();
driver.destroy();
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.
the class GenericUDTFGetSplits method createPlanFragment.
public PlanFragment createPlanFragment(String query, int num) throws HiveException {
HiveConf conf = new HiveConf(SessionState.get().getConf());
HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
// Tez/LLAP requires RPC query plan
HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
try {
jc = DagUtils.getInstance().createConfiguration(conf);
} catch (IOException e) {
throw new HiveException(e);
}
Driver driver = new Driver(conf);
try {
CommandProcessorResponse cpr = driver.compileAndRespond(query);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to compile query: " + cpr.getException());
}
QueryPlan plan = driver.getPlan();
List<Task<?>> roots = plan.getRootTasks();
Schema schema = convertSchema(plan.getResultSchema());
if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
throw new HiveException("Was expecting a single TezTask.");
}
TezWork tezWork = ((TezTask) roots.get(0)).getWork();
if (tezWork.getAllWork().size() != 1) {
String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
String ctas = "create temporary table " + tableName + " as " + query;
LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
try {
driver.resetQueryState();
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
cpr = driver.run(ctas, false);
} catch (CommandNeedRetryException e) {
throw new HiveException(e);
}
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to create temp table: " + cpr.getException());
}
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
query = "select * from " + tableName;
cpr = driver.compileAndRespond(query);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to create temp table: " + cpr.getException());
}
plan = driver.getPlan();
roots = plan.getRootTasks();
schema = convertSchema(plan.getResultSchema());
if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
throw new HiveException("Was expecting a single TezTask.");
}
tezWork = ((TezTask) roots.get(0)).getWork();
}
return new PlanFragment(tezWork, schema, jc);
} finally {
driver.close();
driver.destroy();
}
}
use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.
the class ATSHook method getExecutionMode.
protected ExecutionMode getExecutionMode(QueryPlan plan) {
int numMRJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
int numSparkJobs = Utilities.getSparkTasks(plan.getRootTasks()).size();
int numTezJobs = Utilities.getTezTasks(plan.getRootTasks()).size();
ExecutionMode mode = ExecutionMode.MR;
if (0 == (numMRJobs + numSparkJobs + numTezJobs)) {
mode = ExecutionMode.NONE;
} else if (numSparkJobs > 0) {
return ExecutionMode.SPARK;
} else if (numTezJobs > 0) {
mode = ExecutionMode.TEZ;
// Need to go in and check if any of the tasks is running in LLAP mode.
for (TezTask tezTask : Utilities.getTezTasks(plan.getRootTasks())) {
if (tezTask.getWork().getLlapMode()) {
mode = ExecutionMode.LLAP;
break;
}
}
}
return mode;
}
use of org.apache.hadoop.hive.ql.exec.tez.TezTask in project hive by apache.
the class PostExecOrcRowGroupCountPrinter method run.
@Override
public void run(HookContext hookContext) throws Exception {
assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
HiveConf conf = hookContext.getConf();
if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
return;
}
LOG.info("Executing post execution hook to print ORC row groups read counter..");
SessionState ss = SessionState.get();
SessionState.LogHelper console = ss.getConsole();
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
for (TezTask tezTask : rootTasks) {
LOG.info("Printing ORC row group counter for tez task: " + tezTask.getName());
TezCounters counters = tezTask.getTezCounters();
if (counters != null) {
for (CounterGroup group : counters) {
if (group.getName().equals(LlapIOCounters.class.getName())) {
console.printInfo(tezTask.getId() + " LLAP IO COUNTERS:", false);
for (TezCounter counter : group) {
if (counter.getDisplayName().equals(LlapIOCounters.SELECTED_ROWGROUPS.name())) {
console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
}
}
}
}
}
}
}
Aggregations