use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.
the class HadoopJobTracker method processJobMetaUpdate.
/**
* @param jobId Job ID.
* @param meta Job metadata.
* @param locNodeId Local node ID.
* @throws IgniteCheckedException If failed.
*/
private void processJobMetaUpdate(HadoopJobId jobId, HadoopJobMetadata meta, UUID locNodeId) throws IgniteCheckedException {
JobLocalState state = activeJobs.get(jobId);
HadoopJobEx job = job(jobId, meta.jobInfo());
HadoopMapReducePlan plan = meta.mapReducePlan();
switch(meta.phase()) {
case PHASE_SETUP:
{
if (ctx.jobUpdateLeader()) {
Collection<HadoopTaskInfo> setupTask = setupTask(jobId);
if (setupTask != null)
ctx.taskExecutor().run(job, setupTask);
}
break;
}
case PHASE_MAP:
{
// Check if we should initiate new task on local node.
Collection<HadoopTaskInfo> tasks = mapperTasks(plan.mappers(locNodeId), meta);
if (tasks != null)
ctx.taskExecutor().run(job, tasks);
break;
}
case PHASE_REDUCE:
{
if (meta.pendingReducers().isEmpty() && ctx.jobUpdateLeader()) {
HadoopTaskInfo info = new HadoopTaskInfo(COMMIT, jobId, 0, 0, null);
if (log.isDebugEnabled())
log.debug("Submitting COMMIT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
ctx.taskExecutor().run(job, Collections.singletonList(info));
break;
}
Collection<HadoopTaskInfo> tasks = reducerTasks(plan.reducers(locNodeId), job);
if (tasks != null)
ctx.taskExecutor().run(job, tasks);
break;
}
case PHASE_CANCELLING:
{
// Prevent multiple task executor notification.
if (state != null && state.onCancel()) {
if (log.isDebugEnabled())
log.debug("Cancelling local task execution for job: " + meta);
ctx.taskExecutor().cancelTasks(jobId);
}
if (meta.pendingSplits().isEmpty() && meta.pendingReducers().isEmpty()) {
if (ctx.jobUpdateLeader()) {
if (state == null)
state = initState(jobId);
// Prevent running multiple abort tasks.
if (state.onAborted()) {
HadoopTaskInfo info = new HadoopTaskInfo(ABORT, jobId, 0, 0, null);
if (log.isDebugEnabled())
log.debug("Submitting ABORT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
ctx.taskExecutor().run(job, Collections.singletonList(info));
}
}
break;
} else {
// Check if there are unscheduled mappers or reducers.
Collection<HadoopInputSplit> cancelMappers = new ArrayList<>();
Collection<Integer> cancelReducers = new ArrayList<>();
Collection<HadoopInputSplit> mappers = plan.mappers(ctx.localNodeId());
if (mappers != null) {
for (HadoopInputSplit b : mappers) {
if (state == null || !state.mapperScheduled(b))
cancelMappers.add(b);
}
}
int[] rdc = plan.reducers(ctx.localNodeId());
if (rdc != null) {
for (int r : rdc) {
if (state == null || !state.reducerScheduled(r))
cancelReducers.add(r);
}
}
if (!cancelMappers.isEmpty() || !cancelReducers.isEmpty())
transform(jobId, new CancelJobProcessor(null, cancelMappers, cancelReducers));
}
break;
}
case PHASE_COMPLETE:
{
if (log.isDebugEnabled())
log.debug("Job execution is complete, will remove local state from active jobs " + "[jobId=" + jobId + ", meta=" + meta + ']');
if (state != null) {
state = activeJobs.remove(jobId);
assert state != null;
ctx.shuffle().jobFinished(jobId);
}
GridFutureAdapter<HadoopJobId> finishFut = activeFinishFuts.remove(jobId);
if (finishFut != null) {
if (log.isDebugEnabled())
log.debug("Completing job future [locNodeId=" + locNodeId + ", meta=" + meta + ']');
finishFut.onDone(jobId, meta.failCause());
}
assert job != null;
if (ctx.jobUpdateLeader())
job.cleanupStagingDirectory();
jobs.remove(jobId);
if (ctx.jobUpdateLeader()) {
ClassLoader ldr = job.getClass().getClassLoader();
try {
String statWriterClsName = job.info().property(HadoopCommonUtils.JOB_COUNTER_WRITER_PROPERTY);
if (statWriterClsName != null) {
Class<?> cls = ldr.loadClass(statWriterClsName);
HadoopCounterWriter writer = (HadoopCounterWriter) cls.newInstance();
HadoopCounters cntrs = meta.counters();
writer.write(job, cntrs);
}
} catch (Exception e) {
log.error("Can't write statistic due to: ", e);
}
}
job.dispose(false);
break;
}
default:
throw new IllegalStateException("Unknown phase: " + meta.phase());
}
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.
the class HadoopJobTracker method submit.
/**
* Submits execution of Hadoop job to grid.
*
* @param jobId Job ID.
* @param info Job info.
* @return Job completion future.
*/
@SuppressWarnings("unchecked")
public IgniteInternalFuture<HadoopJobId> submit(HadoopJobId jobId, HadoopJobInfo info) {
if (!busyLock.tryReadLock()) {
return new GridFinishedFuture<>(new IgniteCheckedException("Failed to execute map-reduce job " + "(grid is stopping): " + info));
}
try {
long jobPrepare = U.currentTimeMillis();
if (jobs.containsKey(jobId) || jobMetaCache().containsKey(jobId))
throw new IgniteCheckedException("Failed to submit job. Job with the same ID already exists: " + jobId);
HadoopJobEx job = job(jobId, info);
HadoopMapReducePlan mrPlan = mrPlanner.preparePlan(job, ctx.nodes(), null);
logPlan(info, mrPlan);
HadoopJobMetadata meta = new HadoopJobMetadata(ctx.localNodeId(), jobId, info);
meta.mapReducePlan(mrPlan);
meta.pendingSplits(allSplits(mrPlan));
meta.pendingReducers(allReducers(mrPlan));
GridFutureAdapter<HadoopJobId> completeFut = new GridFutureAdapter<>();
GridFutureAdapter<HadoopJobId> old = activeFinishFuts.put(jobId, completeFut);
assert old == null : "Duplicate completion future [jobId=" + jobId + ", old=" + old + ']';
if (log.isDebugEnabled())
log.debug("Submitting job metadata [jobId=" + jobId + ", meta=" + meta + ']');
long jobStart = U.currentTimeMillis();
HadoopPerformanceCounter perfCntr = HadoopPerformanceCounter.getCounter(meta.counters(), ctx.localNodeId());
perfCntr.clientSubmissionEvents(info);
perfCntr.onJobPrepare(jobPrepare);
perfCntr.onJobStart(jobStart);
if (jobMetaCache().getAndPutIfAbsent(jobId, meta) != null)
throw new IgniteCheckedException("Failed to submit job. Job with the same ID already exists: " + jobId);
return completeFut;
} catch (IgniteCheckedException e) {
U.error(log, "Failed to submit job: " + jobId, e);
return new GridFinishedFuture<>(e);
} finally {
busyLock.readUnlock();
}
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.
the class HadoopV2JobSelfTest method testCustomSerializationApplying.
/**
* Tests that {@link HadoopJobEx} provides wrapped serializer if it's set in configuration.
*
* @throws IgniteCheckedException If fails.
*/
public void testCustomSerializationApplying() throws IgniteCheckedException {
JobConf cfg = new JobConf();
cfg.setMapOutputKeyClass(IntWritable.class);
cfg.setMapOutputValueClass(Text.class);
cfg.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
HadoopDefaultJobInfo info = createJobInfo(cfg);
final UUID uuid = UUID.randomUUID();
HadoopJobId id = new HadoopJobId(uuid, 1);
HadoopJobEx job = info.createJob(HadoopV2Job.class, id, log, null, new HadoopHelperImpl());
HadoopTaskContext taskCtx = job.getTaskContext(new HadoopTaskInfo(HadoopTaskType.MAP, null, 0, 0, null));
HadoopSerialization ser = taskCtx.keySerialization();
assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
DataInput in = new DataInputStream(new ByteArrayInputStream(new byte[0]));
assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
ser = taskCtx.valueSerialization();
assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.
the class HadoopTasksAllVersionsTest method testReduceTask.
/**
* Tests reduce task execution.
*
* @throws Exception If fails.
*/
public void testReduceTask() throws Exception {
HadoopJobEx gridJob = getHadoopJob(igfsScheme() + PATH_INPUT, igfsScheme() + PATH_OUTPUT);
runTaskWithInput(gridJob, HadoopTaskType.REDUCE, 0, "word1", "5", "word2", "10");
runTaskWithInput(gridJob, HadoopTaskType.REDUCE, 1, "word3", "7", "word4", "15");
assertEquals("word1\t5\n" + "word2\t10\n", readAndSortFile(PATH_OUTPUT + "/_temporary/0/task_00000000-0000-0000-0000-000000000000_0000_r_000000/" + getOutputFileNamePrefix() + "00000"));
assertEquals("word3\t7\n" + "word4\t15\n", readAndSortFile(PATH_OUTPUT + "/_temporary/0/task_00000000-0000-0000-0000-000000000000_0000_r_000001/" + getOutputFileNamePrefix() + "00001"));
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.
the class HadoopTasksAllVersionsTest method testCombinerTask.
/**
* Tests combine task execution.
*
* @throws Exception If fails.
*/
public void testCombinerTask() throws Exception {
HadoopJobEx gridJob = getHadoopJob("/", "/");
HadoopTestTaskContext ctx = runTaskWithInput(gridJob, HadoopTaskType.COMBINE, 0, "word1", "5", "word2", "10");
assertEquals("word1,5; word2,10", Joiner.on("; ").join(ctx.mockOutput()));
ctx = runTaskWithInput(gridJob, HadoopTaskType.COMBINE, 1, "word3", "7", "word4", "15");
assertEquals("word3,7; word4,15", Joiner.on("; ").join(ctx.mockOutput()));
}
Aggregations