use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class TestHiveProtoLoggingHook method testQueueLogs.
@Test
public void testQueueLogs() throws Exception {
context.setHookType(HookType.PRE_EXEC_HOOK);
EventLogger evtLogger = new EventLogger(conf, SystemClock.getInstance());
// This makes it MR task
context.getQueryPlan().getRootTasks().add(new ExecDriver());
evtLogger.handle(context);
// This makes it Tez task
MapWork mapWork = new MapWork();
TezWork tezWork = new TezWork("test_queryid");
tezWork.add(mapWork);
TezTask task = new TezTask();
task.setId("id1");
task.setWork(tezWork);
context.getQueryPlan().getRootTasks().add(task);
context.getQueryPlan().getRootTasks().add(new TezTask());
evtLogger.handle(context);
// This makes it llap task
mapWork.setLlapMode(true);
evtLogger.handle(context);
evtLogger.shutdown();
ProtoMessageReader<HiveHookEventProto> reader = getTestReader(conf, tmpFolder);
HiveHookEventProto event = reader.readEvent();
Assert.assertNotNull(event);
Assert.assertEquals(ExecutionMode.MR.name(), event.getExecutionMode());
Assert.assertEquals(event.getQueue(), "mr_queue");
event = reader.readEvent();
Assert.assertNotNull(event);
Assert.assertEquals(ExecutionMode.TEZ.name(), event.getExecutionMode());
Assert.assertEquals(event.getQueue(), "tez_queue");
event = reader.readEvent();
Assert.assertNotNull(event);
Assert.assertEquals(ExecutionMode.LLAP.name(), event.getExecutionMode());
Assert.assertEquals(event.getQueue(), "llap_queue");
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class GenericUDTFGetSplits method createPlanFragment.
public PlanFragment createPlanFragment(String query, int num) throws HiveException {
HiveConf conf = new HiveConf(SessionState.get().getConf());
HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
// Tez/LLAP requires RPC query plan
HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
try {
jc = DagUtils.getInstance().createConfiguration(conf);
} catch (IOException e) {
throw new HiveException(e);
}
Driver driver = new Driver(conf);
try {
CommandProcessorResponse cpr = driver.compileAndRespond(query);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to compile query: " + cpr.getException());
}
QueryPlan plan = driver.getPlan();
List<Task<?>> roots = plan.getRootTasks();
Schema schema = convertSchema(plan.getResultSchema());
if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
throw new HiveException("Was expecting a single TezTask.");
}
TezWork tezWork = ((TezTask) roots.get(0)).getWork();
if (tezWork.getAllWork().size() != 1) {
String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
String ctas = "create temporary table " + tableName + " as " + query;
LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
try {
driver.resetQueryState();
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
cpr = driver.run(ctas, false);
} catch (CommandNeedRetryException e) {
throw new HiveException(e);
}
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to create temp table: " + cpr.getException());
}
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
query = "select * from " + tableName;
cpr = driver.compileAndRespond(query);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to create temp table: " + cpr.getException());
}
plan = driver.getPlan();
roots = plan.getRootTasks();
schema = convertSchema(plan.getResultSchema());
if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
throw new HiveException("Was expecting a single TezTask.");
}
tezWork = ((TezTask) roots.get(0)).getWork();
}
return new PlanFragment(tezWork, schema, jc);
} finally {
driver.close();
driver.destroy();
}
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class DDLTask method mergeFiles.
/**
* First, make sure the source table/partition is not
* archived/indexes/non-rcfile. If either of these is true, throw an
* exception.
*
* The way how it does the merge is to create a BlockMergeTask from the
* mergeFilesDesc.
*
* @param db
* @param mergeFilesDesc
* @return
* @throws HiveException
*/
private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc, DriverContext driverContext) throws HiveException {
ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx();
boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir();
int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel();
// merge work only needs input and output.
MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName(), mergeFilesDesc.getTableDesc());
LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
ArrayList<String> inputDirstr = new ArrayList<String>(1);
inputDirstr.add(mergeFilesDesc.getInputDir().toString());
pathToAliases.put(mergeFilesDesc.getInputDir().get(0), inputDirstr);
mergeWork.setPathToAliases(pathToAliases);
mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
mergeWork.resolveConcatenateMerge(db.getConf());
mergeWork.setMapperCannotSpanPartns(true);
mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName());
final FileMergeDesc fmd;
if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) {
fmd = new RCFileMergeDesc();
} else {
// safe to assume else is ORC as semantic analyzer will check for RC/ORC
fmd = new OrcFileMergeDesc();
}
fmd.setDpCtx(null);
fmd.setHasDynamicPartitions(false);
fmd.setListBucketingAlterTableConcatenate(lbatc);
fmd.setListBucketingDepth(lbd);
fmd.setOutputPath(mergeFilesDesc.getOutputDir());
CompilationOpContext opContext = driverContext.getCtx().getOpContext();
Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(opContext, fmd);
LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
mergeWork.setAliasToWork(aliasToWork);
DriverContext driverCxt = new DriverContext();
Task<?> task;
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
TezWork tezWork = new TezWork(queryState.getQueryId(), conf);
mergeWork.setName("File Merge");
tezWork.add(mergeWork);
task = new TezTask();
((TezTask) task).setWork(tezWork);
} else {
task = new MergeFileTask();
((MergeFileTask) task).setWork(mergeWork);
}
// initialize the task and execute
task.initialize(queryState, getQueryPlan(), driverCxt, opContext);
subtask = task;
int ret = task.execute(driverCxt);
if (subtask.getException() != null) {
setException(subtask.getException());
}
return ret;
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class OperatorHealthCheckerHook method run.
@Override
public void run(HookContext hookContext) throws Exception {
List<Node> rootOps = Lists.newArrayList();
List<Task<?>> roots = hookContext.getQueryPlan().getRootTasks();
for (Task<?> task : roots) {
Object work = task.getWork();
if (work instanceof MapredWork) {
MapredWork mapredWork = (MapredWork) work;
MapWork mapWork = mapredWork.getMapWork();
if (mapWork != null) {
rootOps.addAll(mapWork.getAllRootOperators());
}
ReduceWork reduceWork = mapredWork.getReduceWork();
if (reduceWork != null) {
rootOps.addAll(reduceWork.getAllRootOperators());
}
}
if (work instanceof TezWork) {
for (BaseWork bw : ((TezWork) work).getAllWorkUnsorted()) {
rootOps.addAll(bw.getAllRootOperators());
}
}
}
walkTree(rootOps);
}
use of org.apache.hadoop.hive.ql.plan.TezWork in project hive by apache.
the class TezTask method collectCommitInformation.
private void collectCommitInformation(TezWork work) throws IOException, TezException {
for (BaseWork w : work.getAllWork()) {
JobConf jobConf = workToConf.get(w);
Vertex vertex = workToVertex.get(w);
boolean hasIcebergCommitter = Optional.ofNullable(jobConf).map(JobConf::getOutputCommitter).map(Object::getClass).map(Class::getName).filter(name -> name.endsWith("HiveIcebergNoJobCommitter")).isPresent();
// we should only consider jobs with Iceberg output committer and a data sink
if (hasIcebergCommitter && !vertex.getDataSinks().isEmpty()) {
VertexStatus status = dagClient.getVertexStatus(vertex.getName(), EnumSet.of(StatusGetOpts.GET_COUNTERS));
String[] jobIdParts = status.getId().split("_");
// status.getId() returns something like: vertex_1617722404520_0001_1_00
// this should be transformed to a parsable JobID: job_16177224045200_0001
int vertexId = Integer.parseInt(jobIdParts[jobIdParts.length - 1]);
String jobId = String.format(JOB_ID_TEMPLATE, jobIdParts[1], vertexId, jobIdParts[2]);
List<String> tables = new ArrayList<>();
Map<String, String> icebergProperties = new HashMap<>();
for (Map.Entry<String, String> entry : jobConf) {
if (entry.getKey().startsWith(ICEBERG_SERIALIZED_TABLE_PREFIX)) {
// get all target tables this vertex wrote to
tables.add(entry.getKey().substring(ICEBERG_SERIALIZED_TABLE_PREFIX.length()));
} else if (entry.getKey().startsWith(ICEBERG_PROPERTY_PREFIX)) {
// find iceberg props in jobConf as they can be needed, but not available, during job commit
icebergProperties.put(entry.getKey(), entry.getValue());
}
}
// save information for each target table
tables.forEach(table -> SessionStateUtil.addCommitInfo(jobConf, table, jobId, status.getProgress().getSucceededTaskCount(), icebergProperties));
}
}
}
Aggregations