Search in sources :

Example 21 with DAGStatus

use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.

the class TestMRRJobsDAGApi method testHistoryLogging.

// Submits a simple 5 stage sleep job using tez session. Then kills it.
@Test(timeout = 60000)
public void testHistoryLogging() throws IOException, InterruptedException, TezException, ClassNotFoundException, YarnException {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);
    DAG dag = DAG.create("TezSleepProcessorHistoryLogging");
    Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 2, Resource.newInstance(1024, 1));
    dag.addVertex(vertex);
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random.nextInt(100000))));
    remoteFs.mkdirs(remoteStagingDir);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());
    FileSystem localFs = FileSystem.getLocal(tezConf);
    Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging");
    localFs.mkdirs(historyLogDir);
    tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR, localFs.makeQualified(historyLogDir).toString());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
    TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf);
    tezSession.start();
    DAGClient dagClient = tezSession.submitDAG(dag);
    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
        LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState());
        Thread.sleep(500l);
        dagStatus = dagClient.getDAGStatus(null);
    }
    assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
    FileStatus historyLogFileStatus = null;
    for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) {
        if (fileStatus.isDirectory()) {
            continue;
        }
        Path p = fileStatus.getPath();
        if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) {
            historyLogFileStatus = fileStatus;
            break;
        }
    }
    Assert.assertNotNull(historyLogFileStatus);
    Assert.assertTrue(historyLogFileStatus.getLen() > 0);
    tezSession.stop();
}
Also used : Path(org.apache.hadoop.fs.Path) Vertex(org.apache.tez.dag.api.Vertex) FileStatus(org.apache.hadoop.fs.FileStatus) SleepProcessorConfig(org.apache.tez.runtime.library.processor.SleepProcessor.SleepProcessorConfig) FileSystem(org.apache.hadoop.fs.FileSystem) SleepProcessor(org.apache.tez.runtime.library.processor.SleepProcessor) DAGClient(org.apache.tez.dag.api.client.DAGClient) DAG(org.apache.tez.dag.api.DAG) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Example 22 with DAGStatus

use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.

the class TestMRRJobsDAGApi method testMRRSleepJobDagSubmitCore.

public State testMRRSleepJobDagSubmitCore(boolean dagViaRPC, boolean killDagWhileRunning, boolean closeSessionBeforeSubmit, TezClient reUseTezSession, boolean genSplitsInAM, Class<? extends InputInitializer> initializerClass, Map<String, LocalResource> additionalLocalResources) throws IOException, InterruptedException, TezException, ClassNotFoundException, YarnException {
    LOG.info("\n\n\nStarting testMRRSleepJobDagSubmit().");
    JobConf stage1Conf = new JobConf(mrrTezCluster.getConfig());
    JobConf stage2Conf = new JobConf(mrrTezCluster.getConfig());
    JobConf stage3Conf = new JobConf(mrrTezCluster.getConfig());
    stage1Conf.setLong(MRRSleepJob.MAP_SLEEP_TIME, 1);
    stage1Conf.setInt(MRRSleepJob.MAP_SLEEP_COUNT, 1);
    stage1Conf.setInt(MRJobConfig.NUM_MAPS, 1);
    stage1Conf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName());
    stage1Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
    stage1Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
    stage1Conf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName());
    stage1Conf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    stage2Conf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, 1);
    stage2Conf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, 1);
    stage2Conf.setInt(MRJobConfig.NUM_REDUCES, 1);
    stage2Conf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName());
    stage2Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
    stage2Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
    stage2Conf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    stage3Conf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, 1);
    stage3Conf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, 1);
    stage3Conf.setInt(MRJobConfig.NUM_REDUCES, 1);
    stage3Conf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName());
    stage3Conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName());
    stage3Conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName());
    MRHelpers.translateMRConfToTez(stage1Conf);
    MRHelpers.translateMRConfToTez(stage2Conf);
    MRHelpers.translateMRConfToTez(stage3Conf);
    MRHelpers.configureMRApiUsage(stage1Conf);
    MRHelpers.configureMRApiUsage(stage2Conf);
    MRHelpers.configureMRApiUsage(stage3Conf);
    Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(new Random().nextInt(100000))));
    TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir);
    UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
    UserPayload stage2Payload = TezUtils.createUserPayloadFromConf(stage2Conf);
    UserPayload stage3Payload = TezUtils.createUserPayloadFromConf(stage3Conf);
    DAG dag = DAG.create("testMRRSleepJobDagSubmit-" + random.nextInt(1000));
    Class<? extends InputInitializer> inputInitializerClazz = genSplitsInAM ? (initializerClass == null ? MRInputAMSplitGenerator.class : initializerClass) : null;
    LOG.info("Using initializer class: " + initializerClass);
    DataSourceDescriptor dsd;
    if (!genSplitsInAM) {
        dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, remoteStagingDir, true);
    } else {
        if (initializerClass == null) {
            dsd = MRInputLegacy.createConfigBuilder(stage1Conf, SleepInputFormat.class).build();
        } else {
            InputInitializerDescriptor iid = InputInitializerDescriptor.create(inputInitializerClazz.getName());
            dsd = MRInputLegacy.createConfigBuilder(stage1Conf, SleepInputFormat.class).setCustomInitializerDescriptor(iid).build();
        }
    }
    Vertex stage1Vertex = Vertex.create("map", ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(stage1Payload), dsd.getNumberOfShards(), Resource.newInstance(256, 1));
    stage1Vertex.addDataSource("MRInput", dsd);
    Vertex stage2Vertex = Vertex.create("ireduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(stage2Payload), 1, Resource.newInstance(256, 1));
    Vertex stage3Vertex = Vertex.create("reduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(stage3Payload), 1, Resource.newInstance(256, 1));
    stage3Conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, true);
    DataSinkDescriptor dataSinkDescriptor = MROutputLegacy.createConfigBuilder(stage3Conf, NullOutputFormat.class).build();
    Assert.assertFalse(dataSinkDescriptor.getOutputDescriptor().getHistoryText().isEmpty());
    stage3Vertex.addDataSink("MROutput", dataSinkDescriptor);
    // TODO env, resources
    dag.addVertex(stage1Vertex);
    dag.addVertex(stage2Vertex);
    dag.addVertex(stage3Vertex);
    Edge edge1 = Edge.create(stage1Vertex, stage2Vertex, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(stage2Payload), InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(stage2Payload)));
    Edge edge2 = Edge.create(stage2Vertex, stage3Vertex, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(stage3Payload), InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(stage3Payload)));
    dag.addEdge(edge1);
    dag.addEdge(edge2);
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());
    DAGClient dagClient = null;
    boolean reuseSession = reUseTezSession != null;
    TezClient tezSession = null;
    if (!dagViaRPC) {
        Preconditions.checkArgument(reuseSession == false);
    }
    if (!reuseSession) {
        TezConfiguration tempTezconf = new TezConfiguration(tezConf);
        if (!dagViaRPC) {
            tempTezconf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
        } else {
            tempTezconf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
        }
        tezSession = TezClient.create("testsession", tempTezconf);
        tezSession.start();
    } else {
        tezSession = reUseTezSession;
    }
    if (!dagViaRPC) {
        // TODO Use utility method post TEZ-205 to figure out AM arguments etc.
        dagClient = tezSession.submitDAG(dag);
    }
    if (dagViaRPC && closeSessionBeforeSubmit) {
        YarnClient yarnClient = YarnClient.createYarnClient();
        yarnClient.init(mrrTezCluster.getConfig());
        yarnClient.start();
        boolean sentKillSession = false;
        while (true) {
            Thread.sleep(500l);
            ApplicationReport appReport = yarnClient.getApplicationReport(tezSession.getAppMasterApplicationId());
            if (appReport == null) {
                continue;
            }
            YarnApplicationState appState = appReport.getYarnApplicationState();
            if (!sentKillSession) {
                if (appState == YarnApplicationState.RUNNING) {
                    tezSession.stop();
                    sentKillSession = true;
                }
            } else {
                if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.KILLED || appState == YarnApplicationState.FAILED) {
                    LOG.info("Application completed after sending session shutdown" + ", yarnApplicationState=" + appState + ", finalAppStatus=" + appReport.getFinalApplicationStatus());
                    Assert.assertEquals(YarnApplicationState.FINISHED, appState);
                    Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, appReport.getFinalApplicationStatus());
                    break;
                }
            }
        }
        yarnClient.stop();
        return null;
    }
    if (dagViaRPC) {
        LOG.info("Submitting dag to tez session with appId=" + tezSession.getAppMasterApplicationId() + " and Dag Name=" + dag.getName());
        if (additionalLocalResources != null) {
            tezSession.addAppMasterLocalFiles(additionalLocalResources);
        }
        dagClient = tezSession.submitDAG(dag);
        Assert.assertEquals(TezAppMasterStatus.RUNNING, tezSession.getAppMasterStatus());
    }
    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
        LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: " + dagStatus.getState());
        Thread.sleep(500l);
        if (killDagWhileRunning && dagStatus.getState() == DAGStatus.State.RUNNING) {
            LOG.info("Killing running dag/session");
            if (dagViaRPC) {
                tezSession.stop();
            } else {
                dagClient.tryKillDAG();
            }
        }
        dagStatus = dagClient.getDAGStatus(null);
    }
    if (!reuseSession) {
        tezSession.stop();
    }
    return dagStatus.getState();
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState) TezClient(org.apache.tez.client.TezClient) Random(java.util.Random) SleepInputFormat(org.apache.tez.mapreduce.examples.MRRSleepJob.SleepInputFormat) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) JobConf(org.apache.hadoop.mapred.JobConf) IntWritable(org.apache.hadoop.io.IntWritable) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) MapProcessor(org.apache.tez.mapreduce.processor.map.MapProcessor) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) ISleepReducer(org.apache.tez.mapreduce.examples.MRRSleepJob.ISleepReducer) Path(org.apache.hadoop.fs.Path) UserPayload(org.apache.tez.dag.api.UserPayload) DAG(org.apache.tez.dag.api.DAG) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ReduceProcessor(org.apache.tez.mapreduce.processor.reduce.ReduceProcessor) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) SleepReducer(org.apache.tez.mapreduce.examples.MRRSleepJob.SleepReducer) ISleepReducer(org.apache.tez.mapreduce.examples.MRRSleepJob.ISleepReducer) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) DAGClient(org.apache.tez.dag.api.client.DAGClient) SleepMapper(org.apache.tez.mapreduce.examples.MRRSleepJob.SleepMapper) MRRSleepJobPartitioner(org.apache.tez.mapreduce.examples.MRRSleepJob.MRRSleepJobPartitioner) Edge(org.apache.tez.dag.api.Edge) NullOutputFormat(org.apache.hadoop.mapreduce.lib.output.NullOutputFormat)

Example 23 with DAGStatus

use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.

the class FaultToleranceTestRunner method run.

boolean run(Configuration conf, String className, String confFilePath) throws Exception {
    this.conf = conf;
    setup();
    try {
        tezSession.waitTillReady();
        DAG dag = getDAG(className, confFilePath);
        DAGClient dagClient = tezSession.submitDAG(dag);
        DAGStatus dagStatus = dagClient.getDAGStatus(null);
        while (!dagStatus.isCompleted()) {
            System.out.println("Waiting for dag to complete. Sleeping for 500ms." + " DAG name: " + dag.getName() + " DAG appContext: " + dagClient.getExecutionContext() + " Current state: " + dagStatus.getState());
            Thread.sleep(500);
            dagStatus = dagClient.getDAGStatus(null);
        }
        if (dagStatus.getState() == DAGStatus.State.SUCCEEDED) {
            return true;
        }
    } finally {
        tearDown();
    }
    return false;
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) DAG(org.apache.tez.dag.api.DAG) DAGStatus(org.apache.tez.dag.api.client.DAGStatus)

Example 24 with DAGStatus

use of org.apache.tez.dag.api.client.DAGStatus in project tez by apache.

the class UnionExample method run.

public boolean run(String inputPath, String outputPath, Configuration conf) throws Exception {
    System.out.println("Running UnionExample");
    // conf and UGI
    TezConfiguration tezConf;
    if (conf != null) {
        tezConf = new TezConfiguration(conf);
    } else {
        tezConf = new TezConfiguration();
    }
    UserGroupInformation.setConfiguration(tezConf);
    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging" + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = fs.makeQualified(stagingDir);
    // No need to add jar containing this class as assumed to be part of
    // the tez jars.
    // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
    // is the same filesystem as the one used for Input/Output.
    TezClient tezSession = TezClient.create("UnionExampleSession", tezConf);
    tezSession.start();
    DAGClient dagClient = null;
    try {
        if (fs.exists(new Path(outputPath))) {
            throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
        }
        Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
        DAG dag = createDAG(fs, tezConf, localResources, stagingDir, inputPath, outputPath);
        tezSession.waitTillReady();
        dagClient = tezSession.submitDAG(dag);
        // monitoring
        DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(EnumSet.of(StatusGetOpts.GET_COUNTERS));
        if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
            System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
            return false;
        }
        return true;
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) FileSystem(org.apache.hadoop.fs.FileSystem) DAGClient(org.apache.tez.dag.api.client.DAGClient) DAG(org.apache.tez.dag.api.DAG) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) TreeMap(java.util.TreeMap) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource)

Example 25 with DAGStatus

use of org.apache.tez.dag.api.client.DAGStatus in project hive by apache.

the class TezJobMonitor method monitorExecution.

public int monitorExecution() {
    boolean done = false;
    boolean success = false;
    int failedCounter = 0;
    final StopWatch failureTimer = new StopWatch();
    int rc = 0;
    DAGStatus status = null;
    Map<String, Progress> vertexProgressMap = null;
    long monitorStartTime = System.currentTimeMillis();
    synchronized (shutdownList) {
        shutdownList.add(dagClient);
    }
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
    DAGStatus.State lastState = null;
    boolean running = false;
    long checkInterval = HiveConf.getTimeVar(hiveConf, HiveConf.ConfVars.TEZ_DAG_STATUS_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
    WmContext wmContext = null;
    while (true) {
        try {
            if (context != null) {
                context.checkHeartbeaterLockException();
            }
            wmContext = context.getWmContext();
            EnumSet<StatusGetOpts> opts = null;
            if (wmContext != null) {
                Set<String> desiredCounters = wmContext.getSubscribedCounters();
                if (desiredCounters != null && !desiredCounters.isEmpty()) {
                    opts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
                }
            }
            status = dagClient.getDAGStatus(opts, checkInterval);
            vertexProgressMap = status.getVertexProgress();
            List<String> vertexNames = vertexProgressMap.keySet().stream().map(k -> k.replaceAll(" ", "_")).collect(Collectors.toList());
            if (wmContext != null) {
                Set<String> desiredCounters = wmContext.getSubscribedCounters();
                TezCounters dagCounters = status.getDAGCounters();
                // if initial counters exists, merge it with dag counters to get aggregated view
                TezCounters mergedCounters = counters == null ? dagCounters : Utils.mergeTezCounters(dagCounters, counters);
                if (mergedCounters != null && desiredCounters != null && !desiredCounters.isEmpty()) {
                    Map<String, Long> currentCounters = getCounterValues(mergedCounters, vertexNames, vertexProgressMap, desiredCounters, done);
                    LOG.debug("Requested DAG status. checkInterval: {}. currentCounters: {}", checkInterval, currentCounters);
                    wmContext.setCurrentCounters(currentCounters);
                }
            }
            DAGStatus.State state = status.getState();
            // AM is responsive again (recovery?)
            failedCounter = 0;
            failureTimer.reset();
            if (state != lastState || state == RUNNING) {
                lastState = state;
                switch(state) {
                    case SUBMITTED:
                        console.printInfo("Status: Submitted");
                        break;
                    case INITING:
                        console.printInfo("Status: Initializing");
                        this.executionStartTime = System.currentTimeMillis();
                        break;
                    case RUNNING:
                        if (!running) {
                            perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
                            console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
                            this.executionStartTime = System.currentTimeMillis();
                            running = true;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        break;
                    case SUCCEEDED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        success = true;
                        running = false;
                        done = true;
                        break;
                    case KILLED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printInfo("Status: Killed");
                        running = false;
                        done = true;
                        rc = 1;
                        break;
                    case FAILED:
                    case ERROR:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printError("Status: Failed");
                        running = false;
                        done = true;
                        rc = 2;
                        break;
                }
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } catch (Exception e) {
            console.printInfo("Exception: " + e.getMessage());
            boolean isInterrupted = hasInterruptedException(e);
            if (failedCounter == 0) {
                failureTimer.reset();
                failureTimer.start();
            }
            if (isInterrupted || (++failedCounter >= MAX_RETRY_FAILURES && failureTimer.now(TimeUnit.MILLISECONDS) > MAX_RETRY_INTERVAL)) {
                try {
                    if (isInterrupted) {
                        console.printInfo("Killing DAG...");
                    } else {
                        console.printInfo(String.format("Killing DAG... after %d seconds", failureTimer.now(TimeUnit.SECONDS)));
                    }
                    dagClient.tryKillDAG();
                } catch (IOException | TezException tezException) {
                // best effort
                }
                console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e));
                rc = 1;
                done = true;
            } else {
                console.printInfo("Retrying...");
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } finally {
            if (done) {
                if (wmContext != null && done) {
                    wmContext.setQueryCompleted(true);
                }
                if (rc != 0 && status != null) {
                    for (String diag : status.getDiagnostics()) {
                        console.printError(diag);
                        diagnostics.append(diag);
                    }
                }
                synchronized (shutdownList) {
                    shutdownList.remove(dagClient);
                }
                break;
            }
        }
    }
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    printSummary(success, vertexProgressMap);
    return rc;
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TezCounter(org.apache.tez.common.counters.TezCounter) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) InterruptedIOException(java.io.InterruptedIOException) TimeCounterLimit(org.apache.hadoop.hive.ql.wm.TimeCounterLimit) VertexCounterLimit(org.apache.hadoop.hive.ql.wm.VertexCounterLimit) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) ProgressMonitor(org.apache.hadoop.hive.common.log.ProgressMonitor) Map(java.util.Map) InPlaceUpdate(org.apache.hadoop.hive.common.log.InPlaceUpdate) Context(org.apache.hadoop.hive.ql.Context) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CounterGroup(org.apache.tez.common.counters.CounterGroup) LinkedList(java.util.LinkedList) EnumSet(java.util.EnumSet) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) Progress(org.apache.tez.dag.api.client.Progress) Logger(org.slf4j.Logger) StringWriter(java.io.StringWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) StopWatch(org.apache.tez.util.StopWatch) IOException(java.io.IOException) DAG(org.apache.tez.dag.api.DAG) Collectors(java.util.stream.Collectors) SessionState(org.apache.hadoop.hive.ql.session.SessionState) RUNNING(org.apache.tez.dag.api.client.DAGStatus.State.RUNNING) ShutdownHookManager(org.apache.hive.common.util.ShutdownHookManager) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) TezException(org.apache.tez.dag.api.TezException) TimeUnit(java.util.concurrent.TimeUnit) TezCounters(org.apache.tez.common.counters.TezCounters) List(java.util.List) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Utils(org.apache.hadoop.hive.ql.exec.tez.Utils) Preconditions(com.google.common.base.Preconditions) TezSessionPoolManager(org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager) ExceptionUtils(org.apache.commons.lang3.exception.ExceptionUtils) Progress(org.apache.tez.dag.api.client.Progress) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounters(org.apache.tez.common.counters.TezCounters) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) StopWatch(org.apache.tez.util.StopWatch) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGStatus(org.apache.tez.dag.api.client.DAGStatus)

Aggregations

DAGStatus (org.apache.tez.dag.api.client.DAGStatus)42 DAGClient (org.apache.tez.dag.api.client.DAGClient)37 DAG (org.apache.tez.dag.api.DAG)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)27 Test (org.junit.Test)20 TezClient (org.apache.tez.client.TezClient)18 Vertex (org.apache.tez.dag.api.Vertex)18 Path (org.apache.hadoop.fs.Path)17 SleepProcessor (org.apache.tez.runtime.library.processor.SleepProcessor)9 SleepProcessorConfig (org.apache.tez.runtime.library.processor.SleepProcessor.SleepProcessorConfig)9 FileSystem (org.apache.hadoop.fs.FileSystem)8 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)7 IOException (java.io.IOException)6 TezException (org.apache.tez.dag.api.TezException)6 StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)5 MockContainerLauncher (org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher)5 TreeMap (java.util.TreeMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)4 DAGImpl (org.apache.tez.dag.app.dag.impl.DAGImpl)4