Search in sources :

Example 36 with JobID

use of org.apache.hadoop.mapreduce.JobID in project flink by apache.

the class HadoopOutputFormatBase method open.

/**
	 * create the temporary output file for hadoop RecordWriter.
	 * @param taskNumber The number of the parallel instance.
	 * @param numTasks The number of parallel tasks.
	 * @throws java.io.IOException
	 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }
        this.taskNumber = taskNumber + 1;
        // for hadoop 2.2
        this.configuration.set("mapreduce.output.basename", "tmp");
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
        this.configuration.set("mapred.task.id", taskAttemptID.toString());
        this.configuration.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.configuration.setInt("mapreduce.task.partition", taskNumber + 1);
        try {
            this.context = HadoopUtils.instantiateTaskAttemptContext(this.configuration, taskAttemptID);
            this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
            this.outputCommitter.setupJob(HadoopUtils.instantiateJobContext(this.configuration, new JobID()));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        this.context.getCredentials().addAll(this.credentials);
        Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
        if (currentUserCreds != null) {
            this.context.getCredentials().addAll(currentUserCreds);
        }
        // compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1
        if (outputCommitter instanceof FileOutputCommitter) {
            this.configuration.set("mapreduce.task.output.dir", ((FileOutputCommitter) this.outputCommitter).getWorkPath().toString());
        }
        try {
            this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context);
        } catch (InterruptedException e) {
            throw new IOException("Could not create RecordWriter.", e);
        }
    }
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) IOException(java.io.IOException) JobID(org.apache.hadoop.mapreduce.JobID) IOException(java.io.IOException) Credentials(org.apache.hadoop.security.Credentials)

Example 37 with JobID

use of org.apache.hadoop.mapreduce.JobID in project druid by druid-io.

the class HadoopConverterJob method run.

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);
    // Map only. Number of map tasks determined by input format
    jobConf.setNumReduceTasks(0);
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));
    setJobName(jobConf, segments);
    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }
    final Job job = Job.getInstance(jobConf);
    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);
    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job);
    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();
        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList.copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {

            @Nullable
            @Override
            public DataSegment apply(final Path input) {
                try {
                    if (!fs.exists(input)) {
                        throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                    }
                } catch (final IOException e) {
                    throw Throwables.propagate(e);
                }
                try (final InputStream stream = fs.open(input)) {
                    return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                } catch (final IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE("Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) Function(com.google.common.base.Function) FileSystem(org.apache.hadoop.fs.FileSystem) ISE(io.druid.java.util.common.ISE) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) TaskReport(org.apache.hadoop.mapreduce.TaskReport) InputStream(java.io.InputStream) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) IAE(io.druid.java.util.common.IAE) Map(java.util.Map) JobID(org.apache.hadoop.mapreduce.JobID)

Example 38 with JobID

use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.

the class CommitterEventHandler method serviceInit.

@Override
protected void serviceInit(Configuration conf) throws Exception {
    super.serviceInit(conf);
    commitThreadCancelTimeoutMs = conf.getInt(MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS, MRJobConfig.DEFAULT_MR_AM_COMMITTER_CANCEL_TIMEOUT_MS);
    commitWindowMs = conf.getLong(MRJobConfig.MR_AM_COMMIT_WINDOW_MS, MRJobConfig.DEFAULT_MR_AM_COMMIT_WINDOW_MS);
    try {
        fs = FileSystem.get(conf);
        JobID id = TypeConverter.fromYarn(context.getApplicationID());
        JobId jobId = TypeConverter.toYarn(id);
        String user = UserGroupInformation.getCurrentUser().getShortUserName();
        startCommitFile = MRApps.getStartJobCommitFile(conf, user, jobId);
        endCommitSuccessFile = MRApps.getEndJobCommitSuccessFile(conf, user, jobId);
        endCommitFailureFile = MRApps.getEndJobCommitFailureFile(conf, user, jobId);
    } catch (IOException e) {
        throw new YarnRuntimeException(e);
    }
}
Also used : YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) IOException(java.io.IOException) JobID(org.apache.hadoop.mapreduce.JobID) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 39 with JobID

use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.

the class RMCommunicator method serviceStart.

@Override
protected void serviceStart() throws Exception {
    scheduler = createSchedulerProxy();
    JobID id = TypeConverter.fromYarn(this.applicationId);
    JobId jobId = TypeConverter.toYarn(id);
    job = context.getJob(jobId);
    register();
    startAllocatorThread();
    super.serviceStart();
}
Also used : JobID(org.apache.hadoop.mapreduce.JobID) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 40 with JobID

use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.

the class TestEvents method testTaskAttemptFinishedEvent.

/**
   * test a getters of TaskAttemptFinishedEvent and TaskAttemptFinished
   * 
   * @throws Exception
   */
@Test(timeout = 10000)
public void testTaskAttemptFinishedEvent() throws Exception {
    JobID jid = new JobID("001", 1);
    TaskID tid = new TaskID(jid, TaskType.REDUCE, 2);
    TaskAttemptID taskAttemptId = new TaskAttemptID(tid, 3);
    Counters counters = new Counters();
    TaskAttemptFinishedEvent test = new TaskAttemptFinishedEvent(taskAttemptId, TaskType.REDUCE, "TEST", 123L, "RAKNAME", "HOSTNAME", "STATUS", counters);
    assertEquals(test.getAttemptId().toString(), taskAttemptId.toString());
    assertEquals(test.getCounters(), counters);
    assertEquals(test.getFinishTime(), 123L);
    assertEquals(test.getHostname(), "HOSTNAME");
    assertEquals(test.getRackName(), "RAKNAME");
    assertEquals(test.getState(), "STATUS");
    assertEquals(test.getTaskId(), tid);
    assertEquals(test.getTaskStatus(), "TEST");
    assertEquals(test.getTaskType(), TaskType.REDUCE);
}
Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Counters(org.apache.hadoop.mapreduce.Counters) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Aggregations

JobID (org.apache.hadoop.mapreduce.JobID)61 Test (org.junit.Test)33 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)17 IOException (java.io.IOException)16 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)16 Configuration (org.apache.hadoop.conf.Configuration)12 Job (org.apache.hadoop.mapreduce.Job)8 ArrayList (java.util.ArrayList)7 Path (org.apache.hadoop.fs.Path)7 EventHandler (org.apache.hadoop.yarn.event.EventHandler)7 HashMap (java.util.HashMap)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 JobConf (org.apache.hadoop.mapred.JobConf)6 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)6 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)5 Event (org.apache.hadoop.mapreduce.jobhistory.Event)5 EventType (org.apache.hadoop.mapreduce.jobhistory.EventType)5 JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)5 JobHistoryEventHandler (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler)5