Search in sources :

Example 1 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project tez by apache.

the class TestOrderedWordCount method run.

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    boolean generateSplitsInClient;
    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }
    boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true);
    long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000;
    boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false);
    boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true);
    // TODO needs to use auto reduce parallelism
    int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2);
    int maxDataLengthThroughIPC = conf.getInt(MAX_IPC_DATA_LENGTH, -1);
    int exceedDataLimit = conf.getInt(EXCEED_IPC_DATA_LIMIT, 3);
    if (maxDataLengthThroughIPC > 0) {
        conf.setInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, maxDataLengthThroughIPC * 1024 * 1024);
    }
    if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) {
        printUsage();
        return 2;
    }
    List<String> inputPaths = new ArrayList<String>();
    List<String> outputPaths = new ArrayList<String>();
    TezConfiguration tezConf = new TezConfiguration(conf);
    for (int i = 0; i < otherArgs.length; i += 2) {
        FileSystem inputPathFs = new Path(otherArgs[i]).getFileSystem(tezConf);
        inputPaths.add(inputPathFs.makeQualified(new Path(otherArgs[i])).toString());
        FileSystem outputPathFs = new Path(otherArgs[i + 1]).getFileSystem(tezConf);
        outputPaths.add(outputPathFs.makeQualified(new Path(otherArgs[i + 1])).toString());
    }
    UserGroupInformation.setConfiguration(conf);
    HadoopShim hadoopShim = new HadoopShimsLoader(tezConf).getHadoopShim();
    TestOrderedWordCount instance = new TestOrderedWordCount();
    FileSystem fs = FileSystem.get(conf);
    String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    FileSystem pathFs = stagingDir.getFileSystem(tezConf);
    pathFs.mkdirs(new Path(stagingDirStr));
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = pathFs.makeQualified(new Path(stagingDirStr));
    TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf);
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
    if (useTezSession) {
        LOG.info("Creating Tez Session");
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
    } else {
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
    }
    TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials);
    tezSession.start();
    if (tezSession.getAppMasterApplicationId() != null) {
        TezUtilsInternal.setHadoopCallerContext(hadoopShim, tezSession.getAppMasterApplicationId());
    }
    DAGStatus dagStatus = null;
    DAGClient dagClient = null;
    String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" };
    Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
    try {
        for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) {
            if (dagIndex != 1 && interJobSleepTimeout > 0) {
                try {
                    LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000));
                    Thread.sleep(interJobSleepTimeout);
                } catch (InterruptedException e) {
                    LOG.info("Main thread interrupted. Breaking out of job loop");
                    break;
                }
            }
            String inputPath = inputPaths.get(dagIndex - 1);
            String outputPath = outputPaths.get(dagIndex - 1);
            if (fs.exists(new Path(outputPath))) {
                throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
            }
            LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath + ", outputPath=" + outputPath);
            Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
            DAG dag = instance.createDAG(fs, tezConf, localResources, stagingDir, dagIndex, inputPath, outputPath, generateSplitsInClient, useMRSettings, intermediateNumReduceTasks, maxDataLengthThroughIPC, exceedDataLimit);
            String callerType = "TestOrderedWordCount";
            String callerId = tezSession.getAppMasterApplicationId() == null ? ("UnknownApp_" + System.currentTimeMillis() + dagIndex) : (tezSession.getAppMasterApplicationId().toString() + "_" + dagIndex);
            dag.setCallerContext(CallerContext.create("Tez", callerId, callerType, "TestOrderedWordCount Job"));
            boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true);
            int preWarmNumContainers = 0;
            if (doPreWarm) {
                preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0);
                if (preWarmNumContainers <= 0) {
                    doPreWarm = false;
                }
            }
            if (doPreWarm) {
                LOG.info("Pre-warming Session");
                PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers, dag.getVertex("initialmap").getTaskResource());
                preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles());
                preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment());
                preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts());
                tezSession.preWarm(preWarmVertex);
            }
            if (useTezSession) {
                LOG.info("Waiting for TezSession to get into ready state");
                waitForTezSessionReady(tezSession);
                LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex);
                dagClient = tezSession.submitDAG(dag);
                LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex);
            } else {
                LOG.info("Submitting DAG as a new Tez Application");
                dagClient = tezSession.submitDAG(dag);
            }
            while (true) {
                dagStatus = dagClient.getDAGStatus(statusGetOpts);
                if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
                    break;
                }
                try {
                    Thread.sleep(500);
                } catch (InterruptedException e) {
                // continue;
                }
            }
            while (dagStatus.getState() != DAGStatus.State.SUCCEEDED && dagStatus.getState() != DAGStatus.State.FAILED && dagStatus.getState() != DAGStatus.State.KILLED && dagStatus.getState() != DAGStatus.State.ERROR) {
                if (dagStatus.getState() == DAGStatus.State.RUNNING) {
                    ExampleDriver.printDAGStatus(dagClient, vNames);
                }
                try {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                    // continue;
                    }
                    dagStatus = dagClient.getDAGStatus(statusGetOpts);
                } catch (TezException e) {
                    LOG.error("Failed to get application progress. Exiting");
                    return -1;
                }
            }
            ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
            LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState());
            if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
                LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics());
            }
        }
    } catch (Exception e) {
        LOG.error("Error occurred when submitting/running DAGs", e);
        throw e;
    } finally {
        if (!retainStagingDir) {
            pathFs.delete(stagingDir, true);
        }
        LOG.info("Shutting down session");
        tezSession.stop();
    }
    if (!useTezSession) {
        ExampleDriver.printDAGStatus(dagClient, vNames);
        LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    }
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
Also used : TezException(org.apache.tez.dag.api.TezException) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) HadoopShim(org.apache.tez.hadoop.shim.HadoopShim) ArrayList(java.util.ArrayList) HadoopShimsLoader(org.apache.tez.hadoop.shim.HadoopShimsLoader) TezClient(org.apache.tez.client.TezClient) PreWarmVertex(org.apache.tez.dag.api.PreWarmVertex) FileSystem(org.apache.hadoop.fs.FileSystem) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Path(org.apache.hadoop.fs.Path) DAG(org.apache.tez.dag.api.DAG) TreeMap(java.util.TreeMap) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) ParseException(org.apache.commons.cli.ParseException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) SplitsInClientOptionParser(org.apache.tez.mapreduce.examples.helpers.SplitsInClientOptionParser) DAGClient(org.apache.tez.dag.api.client.DAGClient) ParseException(org.apache.commons.cli.ParseException) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 2 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project tez by apache.

the class ExampleDriver method printDAGStatus.

public static void printDAGStatus(DAGClient dagClient, String[] vertexNames, boolean displayDAGCounters, boolean displayVertexCounters) throws IOException, TezException {
    Set<StatusGetOpts> opts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
    DAGStatus dagStatus = dagClient.getDAGStatus((displayDAGCounters ? opts : null));
    Progress progress = dagStatus.getDAGProgress();
    double vProgressFloat = 0.0f;
    if (progress != null) {
        System.out.println("");
        System.out.println("DAG: State: " + dagStatus.getState() + " Progress: " + (progress.getTotalTaskCount() < 0 ? formatter.format(0.0f) : formatter.format((double) (progress.getSucceededTaskCount()) / progress.getTotalTaskCount())));
        for (String vertexName : vertexNames) {
            VertexStatus vStatus = dagClient.getVertexStatus(vertexName, (displayVertexCounters ? opts : null));
            if (vStatus == null) {
                System.out.println("Could not retrieve status for vertex: " + vertexName);
                continue;
            }
            Progress vProgress = vStatus.getProgress();
            if (vProgress != null) {
                vProgressFloat = 0.0f;
                if (vProgress.getTotalTaskCount() == 0) {
                    vProgressFloat = 1.0f;
                } else if (vProgress.getTotalTaskCount() > 0) {
                    vProgressFloat = (double) vProgress.getSucceededTaskCount() / vProgress.getTotalTaskCount();
                }
                System.out.println("VertexStatus:" + " VertexName: " + (vertexName.equals("ivertex1") ? "intermediate-reducer" : vertexName) + " Progress: " + formatter.format(vProgressFloat));
            }
            if (displayVertexCounters) {
                TezCounters counters = vStatus.getVertexCounters();
                if (counters != null) {
                    System.out.println("Vertex Counters for " + vertexName + ": " + counters);
                }
            }
        }
    }
    if (displayDAGCounters) {
        TezCounters counters = dagStatus.getDAGCounters();
        if (counters != null) {
            System.out.println("DAG Counters: " + counters);
        }
    }
}
Also used : Progress(org.apache.tez.dag.api.client.Progress) VertexStatus(org.apache.tez.dag.api.client.VertexStatus) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 3 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project tez by apache.

the class JoinValidate method runJob.

@Override
protected int runJob(String[] args, TezConfiguration tezConf, TezClient tezClient) throws Exception {
    LOG.info("Running JoinValidate");
    String lhsDir = args[0];
    String rhsDir = args[1];
    int numPartitions = 1;
    if (args.length == 3) {
        numPartitions = Integer.parseInt(args[2]);
    }
    if (numPartitions <= 0) {
        System.err.println("NumPartitions must be > 0");
        return 4;
    }
    Path lhsPath = new Path(lhsDir);
    Path rhsPath = new Path(rhsDir);
    DAG dag = createDag(tezConf, lhsPath, rhsPath, numPartitions);
    tezClient.waitTillReady();
    DAGClient dagClient = tezClient.submitDAG(dag);
    Set<StatusGetOpts> getOpts = Sets.newHashSet();
    if (isCountersLog()) {
        getOpts.add(StatusGetOpts.GET_COUNTERS);
    }
    DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(getOpts);
    if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
        LOG.info("DAG diagnostics: " + dagStatus.getDiagnostics());
        return -1;
    } else {
        dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
        TezCounter counter = dagStatus.getDAGCounters().findCounter(COUNTER_GROUP_NAME, MISSING_KEY_COUNTER_NAME);
        if (counter == null) {
            LOG.info("Unable to determing equality");
            return -2;
        } else {
            if (counter.getValue() != 0) {
                LOG.info("Validate failed. The two sides are not equivalent");
                return -3;
            } else {
                LOG.info("Validation successful. The two sides are equivalent");
                return 0;
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGClient(org.apache.tez.dag.api.client.DAGClient) DAG(org.apache.tez.dag.api.DAG) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) TezCounter(org.apache.tez.common.counters.TezCounter)

Example 4 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project tez by apache.

the class TezExampleBase method runDag.

/**
 * @param dag           the dag to execute
 * @param printCounters whether to print counters or not
 * @param logger        the logger to use while printing diagnostics
 * @return Zero indicates success, non-zero indicates failure
 * @throws TezException
 * @throws InterruptedException
 * @throws IOException
 */
public int runDag(DAG dag, boolean printCounters, Logger logger) throws TezException, InterruptedException, IOException {
    tezClientInternal.waitTillReady();
    CallerContext callerContext = CallerContext.create("TezExamples", "Tez Example DAG: " + dag.getName());
    ApplicationId appId = tezClientInternal.getAppMasterApplicationId();
    if (hadoopShim == null) {
        Configuration conf = (getConf() == null ? new Configuration(false) : getConf());
        hadoopShim = new HadoopShimsLoader(conf).getHadoopShim();
    }
    if (appId != null) {
        TezUtilsInternal.setHadoopCallerContext(hadoopShim, appId);
        callerContext.setCallerIdAndType(appId.toString(), "TezExampleApplication");
    }
    dag.setCallerContext(callerContext);
    DAGClient dagClient = tezClientInternal.submitDAG(dag);
    Set<StatusGetOpts> getOpts = Sets.newHashSet();
    if (printCounters) {
        getOpts.add(StatusGetOpts.GET_COUNTERS);
    }
    DAGStatus dagStatus;
    dagStatus = dagClient.waitForCompletionWithStatusUpdates(getOpts);
    if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
        logger.info("DAG diagnostics: " + dagStatus.getDiagnostics());
        return -1;
    }
    return 0;
}
Also used : CallerContext(org.apache.tez.client.CallerContext) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGClient(org.apache.tez.dag.api.client.DAGClient) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) HadoopShimsLoader(org.apache.tez.hadoop.shim.HadoopShimsLoader)

Example 5 with StatusGetOpts

use of org.apache.tez.dag.api.client.StatusGetOpts in project hive by apache.

the class FSCountersSummary method print.

@Override
public void print(SessionState.LogHelper console) {
    console.printInfo("FileSystem Counters Summary");
    SortedSet<String> keys = new TreeSet<>(progressMap.keySet());
    Set<StatusGetOpts> statusOptions = Collections.singleton(StatusGetOpts.GET_COUNTERS);
    // as well. If not, we need a way to get all the schemes that are accessed by the tez task/llap.
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        final String scheme = statistics.getScheme().toUpperCase();
        console.printInfo("");
        console.printInfo("Scheme: " + scheme);
        console.printInfo(SEPARATOR);
        console.printInfo(HEADER);
        console.printInfo(SEPARATOR);
        for (String vertexName : keys) {
            TezCounters vertexCounters = vertexCounters(statusOptions, vertexName);
            if (vertexCounters != null) {
                console.printInfo(summary(scheme, vertexName, vertexCounters));
            }
        }
        console.printInfo(SEPARATOR);
    }
}
Also used : StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) FileSystem(org.apache.hadoop.fs.FileSystem) TezCounters(org.apache.tez.common.counters.TezCounters)

Aggregations

StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)13 TezCounters (org.apache.tez.common.counters.TezCounters)8 TezCounter (org.apache.tez.common.counters.TezCounter)6 DAGClient (org.apache.tez.dag.api.client.DAGClient)6 Path (org.apache.hadoop.fs.Path)5 DAG (org.apache.tez.dag.api.DAG)5 DAGStatus (org.apache.tez.dag.api.client.DAGStatus)5 IOException (java.io.IOException)4 TezException (org.apache.tez.dag.api.TezException)4 VertexStatus (org.apache.tez.dag.api.client.VertexStatus)4 Context (org.apache.hadoop.hive.ql.Context)3 CounterGroup (org.apache.tez.common.counters.CounterGroup)3 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)3 Progress (org.apache.tez.dag.api.client.Progress)3 Configuration (org.apache.hadoop.conf.Configuration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 MappingInput (org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput)2 TezJobMonitor (org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)2