use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.
the class TestMRRJobsDAGApi method testHistoryLogging.
// Submits a simple 5 stage sleep job using tez session. Then kills it.
@Test(timeout = 60000)
public void testHistoryLogging() throws IOException, InterruptedException, TezException, ClassNotFoundException, YarnException {
SleepProcessorConfig spConf = new SleepProcessorConfig(1);
DAG dag = DAG.create("TezSleepProcessorHistoryLogging");
Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 2, Resource.newInstance(1024, 1));
dag.addVertex(vertex);
TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random.nextInt(100000))));
remoteFs.mkdirs(remoteStagingDir);
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());
FileSystem localFs = FileSystem.getLocal(tezConf);
Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging");
localFs.mkdirs(historyLogDir);
tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR, localFs.makeQualified(historyLogDir).toString());
tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf);
tezSession.start();
DAGClient dagClient = tezSession.submitDAG(dag);
DAGStatus dagStatus = dagClient.getDAGStatus(null);
while (!dagStatus.isCompleted()) {
LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState());
Thread.sleep(500l);
dagStatus = dagClient.getDAGStatus(null);
}
assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
FileStatus historyLogFileStatus = null;
for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) {
if (fileStatus.isDirectory()) {
continue;
}
Path p = fileStatus.getPath();
if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) {
historyLogFileStatus = fileStatus;
break;
}
}
Assert.assertNotNull(historyLogFileStatus);
Assert.assertTrue(historyLogFileStatus.getLen() > 0);
tezSession.stop();
}
use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.
the class TestMRRJobsDAGApi method testMRRSleepJobDagSubmitCore.
public State testMRRSleepJobDagSubmitCore(boolean dagViaRPC, boolean killDagWhileRunning, boolean closeSessionBeforeSubmit, TezClient reUseTezSession, boolean genSplitsInAM, Class<? extends InputInitializer> initializerClass, Map<String, LocalResource> additionalLocalResources) throws IOException, InterruptedException, TezException, ClassNotFoundException, YarnException {
LOG.info("\n\n\nStarting testMRRSleepJobDagSubmit().");
JobConf stage1Conf = new JobConf(mrrTezCluster.getConfig());
JobConf stage2Conf = new JobConf(mrrTezCluster.getConfig());
JobConf stage3Conf = new JobConf(mrrTezCluster.getConfig());
stage1Conf.setLong(MRRSleepJob.MAP_SLEEP_TIME, 1);
stage1Conf.setInt(MRRSleepJob.MAP_SLEEP_COUNT, 1);
stage1Conf.setInt(MRJobConfig.NUM_MAPS, 1);
stage1Conf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
stage1Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
stage1Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
stage1Conf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
stage1Conf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
stage2Conf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, 1);
stage2Conf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, 1);
stage2Conf.setInt(MRJobConfig.NUM_REDUCES, 1);
stage2Conf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
stage2Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
stage2Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
stage2Conf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
stage3Conf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, 1);
stage3Conf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, 1);
stage3Conf.setInt(MRJobConfig.NUM_REDUCES, 1);
stage3Conf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
stage3Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
stage3Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
MRHelpers.translateMRConfToTez(stage1Conf);
MRHelpers.translateMRConfToTez(stage2Conf);
MRHelpers.translateMRConfToTez(stage3Conf);
MRHelpers.configureMRApiUsage(stage1Conf);
MRHelpers.configureMRApiUsage(stage2Conf);
MRHelpers.configureMRApiUsage(stage3Conf);
Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(new Random().nextInt(100000))));
TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir);
UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
UserPayload stage2Payload = TezUtils.createUserPayloadFromConf(stage2Conf);
UserPayload stage3Payload = TezUtils.createUserPayloadFromConf(stage3Conf);
DAG dag = DAG.create("testMRRSleepJobDagSubmit-" + random.nextInt(1000));
Class<? extends InputInitializer> inputInitializerClazz = genSplitsInAM ? (initializerClass == null ? MRInputAMSplitGenerator.class : initializerClass) : null;
LOG.info("Using initializer class: " + initializerClass);
DataSourceDescriptor dsd;
if (!genSplitsInAM) {
dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, remoteStagingDir, true);
} else {
if (initializerClass == null) {
dsd = MRInputLegacy.createConfigBuilder(stage1Conf, SleepInputFormat.class).build();
} else {
InputInitializerDescriptor iid = InputInitializerDescriptor.create(inputInitializerClazz.getName());
dsd = MRInputLegacy.createConfigBuilder(stage1Conf, SleepInputFormat.class).setCustomInitializerDescriptor(iid).build();
}
}
Vertex stage1Vertex = Vertex.create("map", ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(stage1Payload), dsd.getNumberOfShards(), Resource.newInstance(256, 1));
stage1Vertex.addDataSource("MRInput", dsd);
Vertex stage2Vertex = Vertex.create("ireduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(stage2Payload), 1, Resource.newInstance(256, 1));
Vertex stage3Vertex = Vertex.create("reduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(stage3Payload), 1, Resource.newInstance(256, 1));
stage3Conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, true);
DataSinkDescriptor dataSinkDescriptor = MROutputLegacy.createConfigBuilder(stage3Conf, NullOutputFormat.class).build();
Assert.assertFalse(dataSinkDescriptor.getOutputDescriptor().getHistoryText().isEmpty());
stage3Vertex.addDataSink("MROutput", dataSinkDescriptor);
// TODO env, resources
dag.addVertex(stage1Vertex);
dag.addVertex(stage2Vertex);
dag.addVertex(stage3Vertex);
Edge edge1 = Edge.create(stage1Vertex, stage2Vertex, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(stage2Payload), InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(stage2Payload)));
Edge edge2 = Edge.create(stage2Vertex, stage3Vertex, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(stage3Payload), InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(stage3Payload)));
dag.addEdge(edge1);
dag.addEdge(edge2);
TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());
DAGClient dagClient = null;
boolean reuseSession = reUseTezSession != null;
TezClient tezSession = null;
if (!dagViaRPC) {
Preconditions.checkArgument(reuseSession == false);
}
if (!reuseSession) {
TezConfiguration tempTezconf = new TezConfiguration(tezConf);
if (!dagViaRPC) {
tempTezconf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
} else {
tempTezconf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
}
tezSession = TezClient.create("testsession", tempTezconf);
tezSession.start();
} else {
tezSession = reUseTezSession;
}
if (!dagViaRPC) {
// TODO Use utility method post TEZ-205 to figure out AM arguments etc.
dagClient = tezSession.submitDAG(dag);
}
if (dagViaRPC && closeSessionBeforeSubmit) {
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(mrrTezCluster.getConfig());
yarnClient.start();
boolean sentKillSession = false;
while (true) {
Thread.sleep(500l);
ApplicationReport appReport = yarnClient.getApplicationReport(tezSession.getAppMasterApplicationId());
if (appReport == null) {
continue;
}
YarnApplicationState appState = appReport.getYarnApplicationState();
if (!sentKillSession) {
if (appState == YarnApplicationState.RUNNING) {
tezSession.stop();
sentKillSession = true;
}
} else {
if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.KILLED || appState == YarnApplicationState.FAILED) {
LOG.info("Application completed after sending session shutdown" + ", yarnApplicationState=" + appState + ", finalAppStatus=" + appReport.getFinalApplicationStatus());
Assert.assertEquals(YarnApplicationState.FINISHED, appState);
Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, appReport.getFinalApplicationStatus());
break;
}
}
}
yarnClient.stop();
return null;
}
if (dagViaRPC) {
LOG.info("Submitting dag to tez session with appId=" + tezSession.getAppMasterApplicationId() + " and Dag Name=" + dag.getName());
if (additionalLocalResources != null) {
tezSession.addAppMasterLocalFiles(additionalLocalResources);
}
dagClient = tezSession.submitDAG(dag);
Assert.assertEquals(TezAppMasterStatus.RUNNING, tezSession.getAppMasterStatus());
}
DAGStatus dagStatus = dagClient.getDAGStatus(null);
while (!dagStatus.isCompleted()) {
LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState());
Thread.sleep(500l);
if (killDagWhileRunning && dagStatus.getState() == DAGStatus.State.RUNNING) {
LOG.info("Killing running dag/session");
if (dagViaRPC) {
tezSession.stop();
} else {
dagClient.tryKillDAG();
}
}
dagStatus = dagClient.getDAGStatus(null);
}
if (!reuseSession) {
tezSession.stop();
}
return dagStatus.getState();
}
use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.
the class FaultToleranceTestRunner method run.
boolean run(Configuration conf, String className, String confFilePath) throws Exception {
this.conf = conf;
setup();
try {
tezSession.waitTillReady();
DAG dag = getDAG(className, confFilePath);
DAGClient dagClient = tezSession.submitDAG(dag);
DAGStatus dagStatus = dagClient.getDAGStatus(null);
while (!dagStatus.isCompleted()) {
System.out.println("Waiting for dag to complete. Sleeping for 500ms." + " DAG name: " + dag.getName() + " DAG appContext: " + dagClient.getExecutionContext() + " Current state: " + dagStatus.getState());
Thread.sleep(500);
dagStatus = dagClient.getDAGStatus(null);
}
if (dagStatus.getState() == DAGStatus.State.SUCCEEDED) {
return true;
}
} finally {
tearDown();
}
return false;
}
use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.
the class UnionExample method run.
public boolean run(String inputPath, String outputPath, Configuration conf) throws Exception {
System.out.println("Running UnionExample");
// conf and UGI
TezConfiguration tezConf;
if (conf != null) {
tezConf = new TezConfiguration(conf);
} else {
tezConf = new TezConfiguration();
}
UserGroupInformation.setConfiguration(tezConf);
String user = UserGroupInformation.getCurrentUser().getShortUserName();
// staging dir
FileSystem fs = FileSystem.get(tezConf);
String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging" + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
Path stagingDir = new Path(stagingDirStr);
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
stagingDir = fs.makeQualified(stagingDir);
// No need to add jar containing this class as assumed to be part of
// the tez jars.
// TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
// is the same filesystem as the one used for Input/Output.
TezClient tezSession = TezClient.create("UnionExampleSession", tezConf);
tezSession.start();
DAGClient dagClient = null;
try {
if (fs.exists(new Path(outputPath))) {
throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
}
Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
DAG dag = createDAG(fs, tezConf, localResources, stagingDir, inputPath, outputPath);
tezSession.waitTillReady();
dagClient = tezSession.submitDAG(dag);
// monitoring
DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(EnumSet.of(StatusGetOpts.GET_COUNTERS));
if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
return false;
}
return true;
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
}
use of org.apache.tez.dag.api.client.DAGStatus in project hive by apache.
the class TezJobMonitor method monitorExecution.
public int monitorExecution() {
boolean done = false;
boolean success = false;
int failedCounter = 0;
final StopWatch failureTimer = new StopWatch();
int rc = 0;
DAGStatus status = null;
Map<String, Progress> vertexProgressMap = null;
long monitorStartTime = System.currentTimeMillis();
synchronized (shutdownList) {
shutdownList.add(dagClient);
}
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
DAGStatus.State lastState = null;
boolean running = false;
long checkInterval = HiveConf.getTimeVar(hiveConf, HiveConf.ConfVars.TEZ_DAG_STATUS_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
WmContext wmContext = null;
while (true) {
try {
if (context != null) {
context.checkHeartbeaterLockException();
}
wmContext = context.getWmContext();
EnumSet<StatusGetOpts> opts = null;
if (wmContext != null) {
Set<String> desiredCounters = wmContext.getSubscribedCounters();
if (desiredCounters != null && !desiredCounters.isEmpty()) {
opts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
}
}
status = dagClient.getDAGStatus(opts, checkInterval);
vertexProgressMap = status.getVertexProgress();
List<String> vertexNames = vertexProgressMap.keySet().stream().map(k -> k.replaceAll(" ", "_")).collect(Collectors.toList());
if (wmContext != null) {
Set<String> desiredCounters = wmContext.getSubscribedCounters();
TezCounters dagCounters = status.getDAGCounters();
// if initial counters exists, merge it with dag counters to get aggregated view
TezCounters mergedCounters = counters == null ? dagCounters : Utils.mergeTezCounters(dagCounters, counters);
if (mergedCounters != null && desiredCounters != null && !desiredCounters.isEmpty()) {
Map<String, Long> currentCounters = getCounterValues(mergedCounters, vertexNames, vertexProgressMap, desiredCounters, done);
LOG.debug("Requested DAG status. checkInterval: {}. currentCounters: {}", checkInterval, currentCounters);
wmContext.setCurrentCounters(currentCounters);
}
}
DAGStatus.State state = status.getState();
// AM is responsive again (recovery?)
failedCounter = 0;
failureTimer.reset();
if (state != lastState || state == RUNNING) {
lastState = state;
switch(state) {
case SUBMITTED:
console.printInfo("Status: Submitted");
break;
case INITING:
console.printInfo("Status: Initializing");
this.executionStartTime = System.currentTimeMillis();
break;
case RUNNING:
if (!running) {
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
this.executionStartTime = System.currentTimeMillis();
running = true;
}
updateFunction.update(status, vertexProgressMap);
break;
case SUCCEEDED:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
success = true;
running = false;
done = true;
break;
case KILLED:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
console.printInfo("Status: Killed");
running = false;
done = true;
rc = 1;
break;
case FAILED:
case ERROR:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
console.printError("Status: Failed");
running = false;
done = true;
rc = 2;
break;
}
}
if (wmContext != null && done) {
wmContext.setQueryCompleted(true);
}
} catch (Exception e) {
console.printInfo("Exception: " + e.getMessage());
boolean isInterrupted = hasInterruptedException(e);
if (failedCounter == 0) {
failureTimer.reset();
failureTimer.start();
}
if (isInterrupted || (++failedCounter >= MAX_RETRY_FAILURES && failureTimer.now(TimeUnit.MILLISECONDS) > MAX_RETRY_INTERVAL)) {
try {
if (isInterrupted) {
console.printInfo("Killing DAG...");
} else {
console.printInfo(String.format("Killing DAG... after %d seconds", failureTimer.now(TimeUnit.SECONDS)));
}
dagClient.tryKillDAG();
} catch (IOException | TezException tezException) {
// best effort
}
console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e));
rc = 1;
done = true;
} else {
console.printInfo("Retrying...");
}
if (wmContext != null && done) {
wmContext.setQueryCompleted(true);
}
} finally {
if (done) {
if (wmContext != null && done) {
wmContext.setQueryCompleted(true);
}
if (rc != 0 && status != null) {
for (String diag : status.getDiagnostics()) {
console.printError(diag);
diagnostics.append(diag);
}
}
synchronized (shutdownList) {
shutdownList.remove(dagClient);
}
break;
}
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
printSummary(success, vertexProgressMap);
return rc;
}
Aggregations