Examples with HadoopJobEx - org.apache.ignite.internal.processors.hadoop.HadoopJobEx

Example 1 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopJobTracker method processJobMetaUpdate.

/**
     * @param jobId Job ID.
     * @param meta Job metadata.
     * @param locNodeId Local node ID.
     * @throws IgniteCheckedException If failed.
     */
private void processJobMetaUpdate(HadoopJobId jobId, HadoopJobMetadata meta, UUID locNodeId) throws IgniteCheckedException {
    JobLocalState state = activeJobs.get(jobId);
    HadoopJobEx job = job(jobId, meta.jobInfo());
    HadoopMapReducePlan plan = meta.mapReducePlan();
    switch(meta.phase()) {
        case PHASE_SETUP:
            {
                if (ctx.jobUpdateLeader()) {
                    Collection<HadoopTaskInfo> setupTask = setupTask(jobId);
                    if (setupTask != null)
                        ctx.taskExecutor().run(job, setupTask);
                }
                break;
            }
        case PHASE_MAP:
            {
                // Check if we should initiate new task on local node.
                Collection<HadoopTaskInfo> tasks = mapperTasks(plan.mappers(locNodeId), meta);
                if (tasks != null)
                    ctx.taskExecutor().run(job, tasks);
                break;
            }
        case PHASE_REDUCE:
            {
                if (meta.pendingReducers().isEmpty() && ctx.jobUpdateLeader()) {
                    HadoopTaskInfo info = new HadoopTaskInfo(COMMIT, jobId, 0, 0, null);
                    if (log.isDebugEnabled())
                        log.debug("Submitting COMMIT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
                    ctx.taskExecutor().run(job, Collections.singletonList(info));
                    break;
                }
                Collection<HadoopTaskInfo> tasks = reducerTasks(plan.reducers(locNodeId), job);
                if (tasks != null)
                    ctx.taskExecutor().run(job, tasks);
                break;
            }
        case PHASE_CANCELLING:
            {
                // Prevent multiple task executor notification.
                if (state != null && state.onCancel()) {
                    if (log.isDebugEnabled())
                        log.debug("Cancelling local task execution for job: " + meta);
                    ctx.taskExecutor().cancelTasks(jobId);
                }
                if (meta.pendingSplits().isEmpty() && meta.pendingReducers().isEmpty()) {
                    if (ctx.jobUpdateLeader()) {
                        if (state == null)
                            state = initState(jobId);
                        // Prevent running multiple abort tasks.
                        if (state.onAborted()) {
                            HadoopTaskInfo info = new HadoopTaskInfo(ABORT, jobId, 0, 0, null);
                            if (log.isDebugEnabled())
                                log.debug("Submitting ABORT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
                            ctx.taskExecutor().run(job, Collections.singletonList(info));
                        }
                    }
                    break;
                } else {
                    // Check if there are unscheduled mappers or reducers.
                    Collection<HadoopInputSplit> cancelMappers = new ArrayList<>();
                    Collection<Integer> cancelReducers = new ArrayList<>();
                    Collection<HadoopInputSplit> mappers = plan.mappers(ctx.localNodeId());
                    if (mappers != null) {
                        for (HadoopInputSplit b : mappers) {
                            if (state == null || !state.mapperScheduled(b))
                                cancelMappers.add(b);
                        }
                    }
                    int[] rdc = plan.reducers(ctx.localNodeId());
                    if (rdc != null) {
                        for (int r : rdc) {
                            if (state == null || !state.reducerScheduled(r))
                                cancelReducers.add(r);
                        }
                    }
                    if (!cancelMappers.isEmpty() || !cancelReducers.isEmpty())
                        transform(jobId, new CancelJobProcessor(null, cancelMappers, cancelReducers));
                }
                break;
            }
        case PHASE_COMPLETE:
            {
                if (log.isDebugEnabled())
                    log.debug("Job execution is complete, will remove local state from active jobs " + "[jobId=" + jobId + ", meta=" + meta + ']');
                if (state != null) {
                    state = activeJobs.remove(jobId);
                    assert state != null;
                    ctx.shuffle().jobFinished(jobId);
                }
                GridFutureAdapter<HadoopJobId> finishFut = activeFinishFuts.remove(jobId);
                if (finishFut != null) {
                    if (log.isDebugEnabled())
                        log.debug("Completing job future [locNodeId=" + locNodeId + ", meta=" + meta + ']');
                    finishFut.onDone(jobId, meta.failCause());
                }
                assert job != null;
                if (ctx.jobUpdateLeader())
                    job.cleanupStagingDirectory();
                jobs.remove(jobId);
                if (ctx.jobUpdateLeader()) {
                    ClassLoader ldr = job.getClass().getClassLoader();
                    try {
                        String statWriterClsName = job.info().property(HadoopCommonUtils.JOB_COUNTER_WRITER_PROPERTY);
                        if (statWriterClsName != null) {
                            Class<?> cls = ldr.loadClass(statWriterClsName);
                            HadoopCounterWriter writer = (HadoopCounterWriter) cls.newInstance();
                            HadoopCounters cntrs = meta.counters();
                            writer.write(job, cntrs);
                        }
                    } catch (Exception e) {
                        log.error("Can't write statistic due to: ", e);
                    }
                }
                job.dispose(false);
                break;
            }
        default:
            throw new IllegalStateException("Unknown phase: " + meta.phase());
    }
}

Also used : HadoopCounterWriter(org.apache.ignite.internal.processors.hadoop.counter.HadoopCounterWriter) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopCounters(org.apache.ignite.internal.processors.hadoop.counter.HadoopCounters) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) Collection(java.util.Collection) HadoopClassLoader(org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)

Example 2 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopJobTracker method submit.

/**
     * Submits execution of Hadoop job to grid.
     *
     * @param jobId Job ID.
     * @param info Job info.
     * @return Job completion future.
     */
@SuppressWarnings("unchecked")
public IgniteInternalFuture<HadoopJobId> submit(HadoopJobId jobId, HadoopJobInfo info) {
    if (!busyLock.tryReadLock()) {
        return new GridFinishedFuture<>(new IgniteCheckedException("Failed to execute map-reduce job " + "(grid is stopping): " + info));
    }
    try {
        long jobPrepare = U.currentTimeMillis();
        if (jobs.containsKey(jobId) || jobMetaCache().containsKey(jobId))
            throw new IgniteCheckedException("Failed to submit job. Job with the same ID already exists: " + jobId);
        HadoopJobEx job = job(jobId, info);
        HadoopMapReducePlan mrPlan = mrPlanner.preparePlan(job, ctx.nodes(), null);
        logPlan(info, mrPlan);
        HadoopJobMetadata meta = new HadoopJobMetadata(ctx.localNodeId(), jobId, info);
        meta.mapReducePlan(mrPlan);
        meta.pendingSplits(allSplits(mrPlan));
        meta.pendingReducers(allReducers(mrPlan));
        GridFutureAdapter<HadoopJobId> completeFut = new GridFutureAdapter<>();
        GridFutureAdapter<HadoopJobId> old = activeFinishFuts.put(jobId, completeFut);
        assert old == null : "Duplicate completion future [jobId=" + jobId + ", old=" + old + ']';
        if (log.isDebugEnabled())
            log.debug("Submitting job metadata [jobId=" + jobId + ", meta=" + meta + ']');
        long jobStart = U.currentTimeMillis();
        HadoopPerformanceCounter perfCntr = HadoopPerformanceCounter.getCounter(meta.counters(), ctx.localNodeId());
        perfCntr.clientSubmissionEvents(info);
        perfCntr.onJobPrepare(jobPrepare);
        perfCntr.onJobStart(jobStart);
        if (jobMetaCache().getAndPutIfAbsent(jobId, meta) != null)
            throw new IgniteCheckedException("Failed to submit job. Job with the same ID already exists: " + jobId);
        return completeFut;
    } catch (IgniteCheckedException e) {
        U.error(log, "Failed to submit job: " + jobId, e);
        return new GridFinishedFuture<>(e);
    } finally {
        busyLock.readUnlock();
    }
}

Also used : HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) HadoopPerformanceCounter(org.apache.ignite.internal.processors.hadoop.counter.HadoopPerformanceCounter) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) GridFinishedFuture(org.apache.ignite.internal.util.future.GridFinishedFuture)

Example 3 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopV2JobSelfTest method testCustomSerializationApplying.

/**
     * Tests that {@link HadoopJobEx} provides wrapped serializer if it's set in configuration.
     *
     * @throws IgniteCheckedException If fails.
     */
public void testCustomSerializationApplying() throws IgniteCheckedException {
    JobConf cfg = new JobConf();
    cfg.setMapOutputKeyClass(IntWritable.class);
    cfg.setMapOutputValueClass(Text.class);
    cfg.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
    HadoopDefaultJobInfo info = createJobInfo(cfg);
    final UUID uuid = UUID.randomUUID();
    HadoopJobId id = new HadoopJobId(uuid, 1);
    HadoopJobEx job = info.createJob(HadoopV2Job.class, id, log, null, new HadoopHelperImpl());
    HadoopTaskContext taskCtx = job.getTaskContext(new HadoopTaskInfo(HadoopTaskType.MAP, null, 0, 0, null));
    HadoopSerialization ser = taskCtx.keySerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    DataInput in = new DataInputStream(new ByteArrayInputStream(new byte[0]));
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
    ser = taskCtx.valueSerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
}

Also used : HadoopHelperImpl(org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl) DataInputStream(java.io.DataInputStream) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) HadoopSerializationWrapper(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopSerializationWrapper) UUID(java.util.UUID) HadoopSerialization(org.apache.ignite.internal.processors.hadoop.HadoopSerialization) JobConf(org.apache.hadoop.mapred.JobConf)

Example 4 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopTasksAllVersionsTest method testReduceTask.

/**
     * Tests reduce task execution.
     *
     * @throws Exception If fails.
     */
public void testReduceTask() throws Exception {
    HadoopJobEx gridJob = getHadoopJob(igfsScheme() + PATH_INPUT, igfsScheme() + PATH_OUTPUT);
    runTaskWithInput(gridJob, HadoopTaskType.REDUCE, 0, "word1", "5", "word2", "10");
    runTaskWithInput(gridJob, HadoopTaskType.REDUCE, 1, "word3", "7", "word4", "15");
    assertEquals("word1\t5\n" + "word2\t10\n", readAndSortFile(PATH_OUTPUT + "/_temporary/0/task_00000000-0000-0000-0000-000000000000_0000_r_000000/" + getOutputFileNamePrefix() + "00000"));
    assertEquals("word3\t7\n" + "word4\t15\n", readAndSortFile(PATH_OUTPUT + "/_temporary/0/task_00000000-0000-0000-0000-000000000000_0000_r_000001/" + getOutputFileNamePrefix() + "00001"));
}

Also used : HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx)

Example 5 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopTasksAllVersionsTest method testCombinerTask.

/**
     * Tests combine task execution.
     *
     * @throws Exception If fails.
     */
public void testCombinerTask() throws Exception {
    HadoopJobEx gridJob = getHadoopJob("/", "/");
    HadoopTestTaskContext ctx = runTaskWithInput(gridJob, HadoopTaskType.COMBINE, 0, "word1", "5", "word2", "10");
    assertEquals("word1,5; word2,10", Joiner.on("; ").join(ctx.mockOutput()));
    ctx = runTaskWithInput(gridJob, HadoopTaskType.COMBINE, 1, "word3", "7", "word4", "15");
    assertEquals("word3,7; word4,15", Joiner.on("; ").join(ctx.mockOutput()));
}

Also used : HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx)

Aggregations

HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)12 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)7 HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)5 HadoopTaskCancelledException (org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)4 JobConf (org.apache.hadoop.mapred.JobConf)3 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)3 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)3 HadoopJobId (org.apache.ignite.internal.processors.hadoop.HadoopJobId)3 URI (java.net.URI)2 Collection (java.util.Collection)2 UUID (java.util.UUID)2 HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)2 IgfsPath (org.apache.ignite.igfs.IgfsPath)2 HadoopClassLoader (org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)2 HadoopV2TaskContext (org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext)2 GridFutureAdapter (org.apache.ignite.internal.util.future.GridFutureAdapter)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInput (java.io.DataInput)1 DataInputStream (java.io.DataInputStream)1 PrintWriter (java.io.PrintWriter)1