use of org.apache.hadoop.mapreduce.JobID in project flink by apache.
the class HadoopOutputFormatBase method open.
/**
* create the temporary output file for hadoop RecordWriter.
* @param taskNumber The number of the parallel instance.
* @param numTasks The number of parallel tasks.
* @throws java.io.IOException
*/
@Override
public void open(int taskNumber, int numTasks) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
if (Integer.toString(taskNumber + 1).length() > 6) {
throw new IOException("Task id too large.");
}
this.taskNumber = taskNumber + 1;
// for hadoop 2.2
this.configuration.set("mapreduce.output.basename", "tmp");
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
this.configuration.set("mapred.task.id", taskAttemptID.toString());
this.configuration.setInt("mapred.task.partition", taskNumber + 1);
// for hadoop 2.2
this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.configuration.setInt("mapreduce.task.partition", taskNumber + 1);
try {
this.context = HadoopUtils.instantiateTaskAttemptContext(this.configuration, taskAttemptID);
this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
this.outputCommitter.setupJob(HadoopUtils.instantiateJobContext(this.configuration, new JobID()));
} catch (Exception e) {
throw new RuntimeException(e);
}
this.context.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
this.context.getCredentials().addAll(currentUserCreds);
}
// compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1
if (outputCommitter instanceof FileOutputCommitter) {
this.configuration.set("mapreduce.task.output.dir", ((FileOutputCommitter) this.outputCommitter).getWorkPath().toString());
}
try {
this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context);
} catch (InterruptedException e) {
throw new IOException("Could not create RecordWriter.", e);
}
}
}
use of org.apache.hadoop.mapreduce.JobID in project druid by druid-io.
the class HadoopConverterJob method run.
public List<DataSegment> run() throws IOException {
final JobConf jobConf = new JobConf();
jobConf.setKeepFailedTaskFiles(false);
for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
}
final List<DataSegment> segments = converterConfig.getSegments();
if (segments.isEmpty()) {
throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
}
converterConfigIntoConfiguration(converterConfig, segments, jobConf);
// Map only. Number of map tasks determined by input format
jobConf.setNumReduceTasks(0);
jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));
setJobName(jobConf, segments);
if (converterConfig.getJobPriority() != null) {
jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
}
final Job job = Job.getInstance(jobConf);
job.setInputFormatClass(ConfigInputFormat.class);
job.setMapperClass(ConvertingMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapSpeculativeExecution(false);
job.setOutputFormatClass(ConvertingOutputFormat.class);
JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job);
Throwable throwable = null;
try {
job.submit();
log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
final boolean success = job.waitForCompletion(true);
if (!success) {
final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
if (reports != null) {
for (final TaskReport report : reports) {
log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics()));
}
}
return null;
}
try {
loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
} catch (IOException ex) {
log.error(ex, "Could not fetch counters");
}
final JobID jobID = job.getJobID();
final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
final List<Path> goodPaths = new ArrayList<>();
while (it.hasNext()) {
final LocatedFileStatus locatedFileStatus = it.next();
if (locatedFileStatus.isFile()) {
final Path myPath = locatedFileStatus.getPath();
if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
}
}
}
if (goodPaths.isEmpty()) {
log.warn("No good data found at [%s]", jobDir);
return null;
}
final List<DataSegment> returnList = ImmutableList.copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
@Nullable
@Override
public DataSegment apply(final Path input) {
try {
if (!fs.exists(input)) {
throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
}
} catch (final IOException e) {
throw Throwables.propagate(e);
}
try (final InputStream stream = fs.open(input)) {
return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
} catch (final IOException e) {
throw Throwables.propagate(e);
}
}
}));
if (returnList.size() == segments.size()) {
return returnList;
} else {
throw new ISE("Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir);
}
} catch (InterruptedException | ClassNotFoundException e) {
RuntimeException exception = Throwables.propagate(e);
throwable = exception;
throw exception;
} catch (Throwable t) {
throwable = t;
throw t;
} finally {
try {
cleanup(job);
} catch (IOException e) {
if (throwable != null) {
throwable.addSuppressed(e);
} else {
log.error(e, "Could not clean up job [%s]", job.getJobID());
}
}
}
}
use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class CommitterEventHandler method serviceInit.
@Override
protected void serviceInit(Configuration conf) throws Exception {
super.serviceInit(conf);
commitThreadCancelTimeoutMs = conf.getInt(MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS, MRJobConfig.DEFAULT_MR_AM_COMMITTER_CANCEL_TIMEOUT_MS);
commitWindowMs = conf.getLong(MRJobConfig.MR_AM_COMMIT_WINDOW_MS, MRJobConfig.DEFAULT_MR_AM_COMMIT_WINDOW_MS);
try {
fs = FileSystem.get(conf);
JobID id = TypeConverter.fromYarn(context.getApplicationID());
JobId jobId = TypeConverter.toYarn(id);
String user = UserGroupInformation.getCurrentUser().getShortUserName();
startCommitFile = MRApps.getStartJobCommitFile(conf, user, jobId);
endCommitSuccessFile = MRApps.getEndJobCommitSuccessFile(conf, user, jobId);
endCommitFailureFile = MRApps.getEndJobCommitFailureFile(conf, user, jobId);
} catch (IOException e) {
throw new YarnRuntimeException(e);
}
}
use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class RMCommunicator method serviceStart.
@Override
protected void serviceStart() throws Exception {
scheduler = createSchedulerProxy();
JobID id = TypeConverter.fromYarn(this.applicationId);
JobId jobId = TypeConverter.toYarn(id);
job = context.getJob(jobId);
register();
startAllocatorThread();
super.serviceStart();
}
use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class TestEvents method testTaskAttemptFinishedEvent.
/**
* test a getters of TaskAttemptFinishedEvent and TaskAttemptFinished
*
* @throws Exception
*/
@Test(timeout = 10000)
public void testTaskAttemptFinishedEvent() throws Exception {
JobID jid = new JobID("001", 1);
TaskID tid = new TaskID(jid, TaskType.REDUCE, 2);
TaskAttemptID taskAttemptId = new TaskAttemptID(tid, 3);
Counters counters = new Counters();
TaskAttemptFinishedEvent test = new TaskAttemptFinishedEvent(taskAttemptId, TaskType.REDUCE, "TEST", 123L, "RAKNAME", "HOSTNAME", "STATUS", counters);
assertEquals(test.getAttemptId().toString(), taskAttemptId.toString());
assertEquals(test.getCounters(), counters);
assertEquals(test.getFinishTime(), 123L);
assertEquals(test.getHostname(), "HOSTNAME");
assertEquals(test.getRackName(), "RAKNAME");
assertEquals(test.getState(), "STATUS");
assertEquals(test.getTaskId(), tid);
assertEquals(test.getTaskStatus(), "TEST");
assertEquals(test.getTaskType(), TaskType.REDUCE);
}
Aggregations