Search in sources :

Example 76 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project oozie by apache.

the class MapReduceActionExecutor method end.

@Override
public void end(Context context, WorkflowAction action) throws ActionExecutorException {
    super.end(context, action);
    JobClient jobClient = null;
    boolean exception = false;
    try {
        if (action.getStatus() == WorkflowAction.Status.OK) {
            Element actionXml = XmlUtils.parseXml(action.getConf());
            Configuration jobConf = createBaseHadoopConf(context, actionXml);
            jobClient = createJobClient(context, jobConf);
            RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalChildIDs()));
            if (runningJob == null) {
                throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "MR002", "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", action.getExternalChildIDs(), action.getId());
            }
            Counters counters = runningJob.getCounters();
            if (counters != null) {
                ActionStats stats = new MRStats(counters);
                String statsJsonString = stats.toJSON();
                context.setVar(HADOOP_COUNTERS, statsJsonString);
                // do not store the action stats
                if (Boolean.parseBoolean(evaluateConfigurationProperty(actionXml, OOZIE_ACTION_EXTERNAL_STATS_WRITE, "false")) && (statsJsonString.getBytes().length <= getMaxExternalStatsSize())) {
                    context.setExecutionStats(statsJsonString);
                    log.debug("Printing stats for Map-Reduce action as a JSON string : [{0}]", statsJsonString);
                }
            } else {
                context.setVar(HADOOP_COUNTERS, "");
                XLog.getLog(getClass()).warn("Could not find Hadoop Counters for: [{0}]", action.getExternalChildIDs());
            }
        }
    } catch (Exception ex) {
        exception = true;
        throw convertException(ex);
    } finally {
        if (jobClient != null) {
            try {
                jobClient.close();
            } catch (Exception e) {
                if (exception) {
                    log.error("JobClient error: ", e);
                } else {
                    throw convertException(e);
                }
            }
        }
    }
}
Also used : XConfiguration(org.apache.oozie.util.XConfiguration) Configuration(org.apache.hadoop.conf.Configuration) Element(org.jdom.Element) RunningJob(org.apache.hadoop.mapred.RunningJob) ActionExecutorException(org.apache.oozie.action.ActionExecutorException) Counters(org.apache.hadoop.mapred.Counters) JobClient(org.apache.hadoop.mapred.JobClient) ActionExecutorException(org.apache.oozie.action.ActionExecutorException) IOException(java.io.IOException)

Example 77 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project oozie by apache.

the class SqoopActionExecutor method end.

/**
 * We will gather counters from all executed action Hadoop jobs (e.g. jobs
 * that moved data, not the launcher itself) and merge them together. There
 * will be only one job most of the time. The only exception is
 * import-all-table option that will execute one job per one exported table.
 *
 * @param context Action context
 * @param action Workflow action
 * @throws ActionExecutorException
 */
@Override
public void end(Context context, WorkflowAction action) throws ActionExecutorException {
    super.end(context, action);
    JobClient jobClient = null;
    boolean exception = false;
    try {
        if (action.getStatus() == WorkflowAction.Status.OK) {
            Element actionXml = XmlUtils.parseXml(action.getConf());
            Configuration jobConf = createBaseHadoopConf(context, actionXml);
            jobClient = createJobClient(context, jobConf);
            // Cumulative counters for all Sqoop mapreduce jobs
            Counters counters = null;
            // Sqoop do not have to create mapreduce job each time
            String externalIds = action.getExternalChildIDs();
            if (externalIds != null && !externalIds.trim().isEmpty()) {
                String[] jobIds = externalIds.split(",");
                for (String jobId : jobIds) {
                    RunningJob runningJob = jobClient.getJob(JobID.forName(jobId));
                    if (runningJob == null) {
                        throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "SQOOP001", "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", action.getExternalId(), action.getId());
                    }
                    Counters taskCounters = runningJob.getCounters();
                    if (taskCounters != null) {
                        if (counters == null) {
                            counters = taskCounters;
                        } else {
                            counters.incrAllCounters(taskCounters);
                        }
                    } else {
                        XLog.getLog(getClass()).warn("Could not find Hadoop Counters for job: [{0}]", jobId);
                    }
                }
            }
            if (counters != null) {
                ActionStats stats = new MRStats(counters);
                String statsJsonString = stats.toJSON();
                context.setVar(MapReduceActionExecutor.HADOOP_COUNTERS, statsJsonString);
                // do not store the action stats
                if (Boolean.parseBoolean(evaluateConfigurationProperty(actionXml, OOZIE_ACTION_EXTERNAL_STATS_WRITE, "true")) && (statsJsonString.getBytes().length <= getMaxExternalStatsSize())) {
                    context.setExecutionStats(statsJsonString);
                    LOG.debug("Printing stats for sqoop action as a JSON string : [{0}]", statsJsonString);
                }
            } else {
                context.setVar(MapReduceActionExecutor.HADOOP_COUNTERS, "");
                XLog.getLog(getClass()).warn("Can't find any associated Hadoop job counters");
            }
        }
    } catch (Exception ex) {
        exception = true;
        throw convertException(ex);
    } finally {
        if (jobClient != null) {
            try {
                jobClient.close();
            } catch (Exception e) {
                if (exception) {
                    LOG.error("JobClient error: ", e);
                } else {
                    throw convertException(e);
                }
            }
        }
    }
}
Also used : XConfiguration(org.apache.oozie.util.XConfiguration) Configuration(org.apache.hadoop.conf.Configuration) Element(org.jdom.Element) RunningJob(org.apache.hadoop.mapred.RunningJob) ActionExecutorException(org.apache.oozie.action.ActionExecutorException) Counters(org.apache.hadoop.mapred.Counters) JobClient(org.apache.hadoop.mapred.JobClient) ActionExecutorException(org.apache.oozie.action.ActionExecutorException) IOException(java.io.IOException)

Example 78 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project oozie by apache.

the class TestActionCheckXCommand method testActionCheckTransientDuringLauncher.

public void testActionCheckTransientDuringLauncher() throws Exception {
    // When using YARN, skip this test because it relies on shutting down the job tracker, which isn't used in YARN
    if (createJobConf().get("yarn.resourcemanager.address") != null) {
        return;
    }
    services.destroy();
    // Make the max number of retries lower so the test won't take as long
    final int maxRetries = 2;
    setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries));
    services = new Services();
    // Disable ActionCheckerService so it doesn't interfere by triggering any extra ActionCheckXCommands
    setClassesToBeExcluded(services.getConf(), new String[] { "org.apache.oozie.service.ActionCheckerService" });
    services.init();
    final JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job0 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING);
    final String jobId = job0.getId();
    WorkflowActionBean action0 = this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP);
    final String actionId = action0.getId();
    final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId);
    new ActionStartXCommand(actionId, "map-reduce").call();
    final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd);
    String originalLauncherId = action1.getExternalId();
    // At this point, the launcher job has started (but not finished)
    // Now, shutdown the job tracker to pretend it has gone down during the launcher job
    executeWhileJobTrackerIsShutdown(new ShutdownJobTrackerExecutable() {

        @Override
        public void execute() throws Exception {
            assertEquals(0, action1.getRetries());
            new ActionCheckXCommand(actionId).call();
            waitFor(30 * 1000, new Predicate() {

                @Override
                public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() > 0);
                }
            });
            waitFor(180 * 1000, new Predicate() {

                @Override
                public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() == 0);
                }
            });
            WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd);
            assertEquals(0, action1b.getRetries());
            assertEquals("START_MANUAL", action1b.getStatusStr());
            WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
            assertEquals("SUSPENDED", job1.getStatusStr());
        // At this point, the action has gotten a transient error, even after maxRetries tries so the workflow has been
        // SUSPENDED
        }
    });
    // Now, lets bring the job tracker back up and resume the workflow (which will restart the current action)
    // It should now continue and finish with SUCCEEDED
    new ResumeXCommand(jobId).call();
    WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
    assertEquals("RUNNING", job2.getStatusStr());
    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job2, action1, false, false);
    WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    Configuration conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action2.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);
    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd);
    String launcherId = action3.getExternalId();
    assertFalse(originalLauncherId.equals(launcherId));
    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));
    waitFor(120 * 1000, new Predicate() {

        @Override
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherHelper.hasIdSwap(actionData));
    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd);
    String mapperId = action4.getExternalId();
    String childId = action4.getExternalChildIDs();
    assertTrue(launcherId.equals(mapperId));
    final RunningJob mrJob = jobClient.getJob(JobID.forName(childId));
    waitFor(120 * 1000, new Predicate() {

        @Override
        public boolean evaluate() throws Exception {
            return mrJob.isComplete();
        }
    });
    assertTrue(mrJob.isSuccessful());
    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd);
    assertEquals("SUCCEEDED", action5.getExternalStatus());
}
Also used : WorkflowJobGetJPAExecutor(org.apache.oozie.executor.jpa.WorkflowJobGetJPAExecutor) Configuration(org.apache.hadoop.conf.Configuration) WorkflowJobBean(org.apache.oozie.WorkflowJobBean) JobClient(org.apache.hadoop.mapred.JobClient) HadoopAccessorService(org.apache.oozie.service.HadoopAccessorService) WorkflowActionBean(org.apache.oozie.WorkflowActionBean) JPAExecutorException(org.apache.oozie.executor.jpa.JPAExecutorException) ActionExecutorException(org.apache.oozie.action.ActionExecutorException) Services(org.apache.oozie.service.Services) RunningJob(org.apache.hadoop.mapred.RunningJob) WorkflowActionGetJPAExecutor(org.apache.oozie.executor.jpa.WorkflowActionGetJPAExecutor) MapReduceActionExecutor(org.apache.oozie.action.hadoop.MapReduceActionExecutor) JPAService(org.apache.oozie.service.JPAService) ActionExecutorContext(org.apache.oozie.command.wf.ActionXCommand.ActionExecutorContext)

Example 79 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project oozie by apache.

the class MapReduceMain method run.

protected void run(String[] args) throws Exception {
    System.out.println();
    System.out.println("Oozie Map-Reduce action configuration");
    System.out.println("=======================");
    // loading action conf prepared by Oozie
    Configuration actionConf = new Configuration(false);
    actionConf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml")));
    setYarnTag(actionConf);
    JobConf jobConf = new JobConf();
    addActionConf(jobConf, actionConf);
    LauncherMain.killChildYarnJobs(jobConf);
    // Run a config class if given to update the job conf
    runConfigClass(jobConf);
    PasswordMasker passwordMasker = new PasswordMasker();
    // Temporary JobConf object, we mask out possible passwords before we print key-value pairs
    JobConf maskedJobConf = new JobConf(false);
    for (Entry<String, String> entry : jobConf) {
        maskedJobConf.set(entry.getKey(), passwordMasker.maskPasswordsIfNecessary(entry.getValue()));
    }
    logMasking("Map-Reduce job configuration:", maskedJobConf);
    File idFile = new File(System.getProperty(LauncherAMUtils.ACTION_PREFIX + LauncherAMUtils.ACTION_DATA_NEW_ID));
    System.out.println("Submitting Oozie action Map-Reduce job");
    System.out.println();
    // submitting job
    RunningJob runningJob = submitJob(jobConf);
    String jobId = runningJob.getID().toString();
    writeJobIdFile(idFile, jobId);
    System.out.println("=======================");
    System.out.println();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File)

Example 80 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project oozie by apache.

the class TestMapReduceActionExecutor method _testSubmitWithCredentials.

private void _testSubmitWithCredentials(String name, String actionXml) throws Exception {
    Context context = createContextWithCredentials(MAP_REDUCE, actionXml);
    final String launcherId = submitAction(context);
    waitUntilYarnAppDoneAndAssertSuccess(launcherId);
    Map<String, String> actionData = LauncherHelper.getActionData(getFileSystem(), context.getActionDir(), context.getProtoActionConf());
    assertTrue(LauncherHelper.hasIdSwap(actionData));
    MapReduceActionExecutor ae = new MapReduceActionExecutor();
    ae.check(context, context.getAction());
    assertTrue(launcherId.equals(context.getAction().getExternalId()));
    String externalChildIDs = context.getAction().getExternalChildIDs();
    waitUntilYarnAppDoneAndAssertSuccess(externalChildIDs);
    ae.check(context, context.getAction());
    assertEquals(JavaActionExecutor.SUCCEEDED, context.getAction().getExternalStatus());
    assertNull(context.getAction().getData());
    ae.end(context, context.getAction());
    assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());
    Configuration conf = ae.createBaseHadoopConf(context, XmlUtils.parseXml(actionXml));
    String user = conf.get("user.name");
    JobClient jobClient = getHadoopAccessorService().createJobClient(user, conf);
    org.apache.hadoop.mapreduce.JobID jobID = TypeConverter.fromYarn(ConverterUtils.toApplicationId(externalChildIDs));
    final RunningJob mrJob = jobClient.getJob(JobID.downgrade(jobID));
    assertTrue(MapperReducerCredentialsForTest.hasCredentials(mrJob));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) XConfiguration(org.apache.oozie.util.XConfiguration) RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient)

Aggregations

RunningJob (org.apache.hadoop.mapred.RunningJob)93 JobConf (org.apache.hadoop.mapred.JobConf)65 Path (org.apache.hadoop.fs.Path)49 JobClient (org.apache.hadoop.mapred.JobClient)33 IOException (java.io.IOException)28 FileSystem (org.apache.hadoop.fs.FileSystem)28 DMLConfig (org.apache.sysml.conf.DMLConfig)27 Group (org.apache.hadoop.mapred.Counters.Group)26 Counters (org.apache.hadoop.mapred.Counters)17 Configuration (org.apache.hadoop.conf.Configuration)14 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)13 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)8 File (java.io.File)6 TaggedMatrixBlock (org.apache.sysml.runtime.matrix.data.TaggedMatrixBlock)6 DataOutputStream (java.io.DataOutputStream)5 URI (java.net.URI)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 Context (org.apache.hadoop.hive.ql.Context)5 Text (org.apache.hadoop.io.Text)5