Search in sources :

Example 1 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopExecutorService method startThread.

/**
     * @param task Task.
     */
private void startThread(final Callable<?> task) {
    String workerName;
    if (task instanceof HadoopRunnableTask) {
        final HadoopTaskInfo i = ((HadoopRunnableTask) task).taskInfo();
        workerName = "Hadoop-task-" + i.jobId() + "-" + i.type() + "-" + i.taskNumber() + "-" + i.attempt();
    } else
        workerName = task.toString();
    GridWorker w = new GridWorker(igniteInstanceName, workerName, log, lsnr) {

        @Override
        protected void body() {
            try {
                task.call();
            } catch (Exception e) {
                log.error("Failed to execute task: " + task, e);
            }
        }
    };
    workers.add(w);
    if (shutdown)
        w.cancel();
    new IgniteThread(w).start();
}
Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) IgniteThread(org.apache.ignite.thread.IgniteThread) GridWorker(org.apache.ignite.internal.util.worker.GridWorker)

Example 2 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopJobTracker method processJobMetaUpdate.

/**
     * @param jobId Job ID.
     * @param meta Job metadata.
     * @param locNodeId Local node ID.
     * @throws IgniteCheckedException If failed.
     */
private void processJobMetaUpdate(HadoopJobId jobId, HadoopJobMetadata meta, UUID locNodeId) throws IgniteCheckedException {
    JobLocalState state = activeJobs.get(jobId);
    HadoopJobEx job = job(jobId, meta.jobInfo());
    HadoopMapReducePlan plan = meta.mapReducePlan();
    switch(meta.phase()) {
        case PHASE_SETUP:
            {
                if (ctx.jobUpdateLeader()) {
                    Collection<HadoopTaskInfo> setupTask = setupTask(jobId);
                    if (setupTask != null)
                        ctx.taskExecutor().run(job, setupTask);
                }
                break;
            }
        case PHASE_MAP:
            {
                // Check if we should initiate new task on local node.
                Collection<HadoopTaskInfo> tasks = mapperTasks(plan.mappers(locNodeId), meta);
                if (tasks != null)
                    ctx.taskExecutor().run(job, tasks);
                break;
            }
        case PHASE_REDUCE:
            {
                if (meta.pendingReducers().isEmpty() && ctx.jobUpdateLeader()) {
                    HadoopTaskInfo info = new HadoopTaskInfo(COMMIT, jobId, 0, 0, null);
                    if (log.isDebugEnabled())
                        log.debug("Submitting COMMIT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
                    ctx.taskExecutor().run(job, Collections.singletonList(info));
                    break;
                }
                Collection<HadoopTaskInfo> tasks = reducerTasks(plan.reducers(locNodeId), job);
                if (tasks != null)
                    ctx.taskExecutor().run(job, tasks);
                break;
            }
        case PHASE_CANCELLING:
            {
                // Prevent multiple task executor notification.
                if (state != null && state.onCancel()) {
                    if (log.isDebugEnabled())
                        log.debug("Cancelling local task execution for job: " + meta);
                    ctx.taskExecutor().cancelTasks(jobId);
                }
                if (meta.pendingSplits().isEmpty() && meta.pendingReducers().isEmpty()) {
                    if (ctx.jobUpdateLeader()) {
                        if (state == null)
                            state = initState(jobId);
                        // Prevent running multiple abort tasks.
                        if (state.onAborted()) {
                            HadoopTaskInfo info = new HadoopTaskInfo(ABORT, jobId, 0, 0, null);
                            if (log.isDebugEnabled())
                                log.debug("Submitting ABORT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
                            ctx.taskExecutor().run(job, Collections.singletonList(info));
                        }
                    }
                    break;
                } else {
                    // Check if there are unscheduled mappers or reducers.
                    Collection<HadoopInputSplit> cancelMappers = new ArrayList<>();
                    Collection<Integer> cancelReducers = new ArrayList<>();
                    Collection<HadoopInputSplit> mappers = plan.mappers(ctx.localNodeId());
                    if (mappers != null) {
                        for (HadoopInputSplit b : mappers) {
                            if (state == null || !state.mapperScheduled(b))
                                cancelMappers.add(b);
                        }
                    }
                    int[] rdc = plan.reducers(ctx.localNodeId());
                    if (rdc != null) {
                        for (int r : rdc) {
                            if (state == null || !state.reducerScheduled(r))
                                cancelReducers.add(r);
                        }
                    }
                    if (!cancelMappers.isEmpty() || !cancelReducers.isEmpty())
                        transform(jobId, new CancelJobProcessor(null, cancelMappers, cancelReducers));
                }
                break;
            }
        case PHASE_COMPLETE:
            {
                if (log.isDebugEnabled())
                    log.debug("Job execution is complete, will remove local state from active jobs " + "[jobId=" + jobId + ", meta=" + meta + ']');
                if (state != null) {
                    state = activeJobs.remove(jobId);
                    assert state != null;
                    ctx.shuffle().jobFinished(jobId);
                }
                GridFutureAdapter<HadoopJobId> finishFut = activeFinishFuts.remove(jobId);
                if (finishFut != null) {
                    if (log.isDebugEnabled())
                        log.debug("Completing job future [locNodeId=" + locNodeId + ", meta=" + meta + ']');
                    finishFut.onDone(jobId, meta.failCause());
                }
                assert job != null;
                if (ctx.jobUpdateLeader())
                    job.cleanupStagingDirectory();
                jobs.remove(jobId);
                if (ctx.jobUpdateLeader()) {
                    ClassLoader ldr = job.getClass().getClassLoader();
                    try {
                        String statWriterClsName = job.info().property(HadoopCommonUtils.JOB_COUNTER_WRITER_PROPERTY);
                        if (statWriterClsName != null) {
                            Class<?> cls = ldr.loadClass(statWriterClsName);
                            HadoopCounterWriter writer = (HadoopCounterWriter) cls.newInstance();
                            HadoopCounters cntrs = meta.counters();
                            writer.write(job, cntrs);
                        }
                    } catch (Exception e) {
                        log.error("Can't write statistic due to: ", e);
                    }
                }
                job.dispose(false);
                break;
            }
        default:
            throw new IllegalStateException("Unknown phase: " + meta.phase());
    }
}
Also used : HadoopCounterWriter(org.apache.ignite.internal.processors.hadoop.counter.HadoopCounterWriter) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopCounters(org.apache.ignite.internal.processors.hadoop.counter.HadoopCounters) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) Collection(java.util.Collection) HadoopClassLoader(org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)

Example 3 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopJobTracker method reducerTasks.

/**
     * Creates reducer tasks based on job information.
     *
     * @param reducers Reducers (may be {@code null}).
     * @param job Job instance.
     * @return Collection of task infos.
     */
private Collection<HadoopTaskInfo> reducerTasks(int[] reducers, HadoopJobEx job) {
    UUID locNodeId = ctx.localNodeId();
    HadoopJobId jobId = job.id();
    JobLocalState state = activeJobs.get(jobId);
    Collection<HadoopTaskInfo> tasks = null;
    if (reducers != null) {
        if (state == null)
            state = initState(job.id());
        for (int rdc : reducers) {
            if (state.addReducer(rdc)) {
                if (log.isDebugEnabled())
                    log.debug("Submitting REDUCE task for execution [locNodeId=" + locNodeId + ", rdc=" + rdc + ']');
                HadoopTaskInfo taskInfo = new HadoopTaskInfo(REDUCE, jobId, rdc, 0, null);
                if (tasks == null)
                    tasks = new ArrayList<>();
                tasks.add(taskInfo);
            }
        }
    }
    return tasks;
}
Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) ArrayList(java.util.ArrayList) UUID(java.util.UUID) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId)

Example 4 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopJobTracker method mapperTasks.

/**
     * Creates mapper tasks based on job information.
     *
     * @param mappers Mapper blocks.
     * @param meta Job metadata.
     * @return Collection of created task infos or {@code null} if no mapper tasks scheduled for local node.
     */
private Collection<HadoopTaskInfo> mapperTasks(Iterable<HadoopInputSplit> mappers, HadoopJobMetadata meta) {
    UUID locNodeId = ctx.localNodeId();
    HadoopJobId jobId = meta.jobId();
    JobLocalState state = activeJobs.get(jobId);
    Collection<HadoopTaskInfo> tasks = null;
    if (mappers != null) {
        if (state == null)
            state = initState(jobId);
        int mapperIdx = 0;
        for (HadoopInputSplit split : mappers) {
            if (state.addMapper(split)) {
                if (log.isDebugEnabled())
                    log.debug("Submitting MAP task for execution [locNodeId=" + locNodeId + ", split=" + split + ']');
                HadoopTaskInfo taskInfo = new HadoopTaskInfo(MAP, jobId, meta.taskNumber(split), 0, split);
                taskInfo.mapperIndex(mapperIdx++);
                if (tasks == null)
                    tasks = new ArrayList<>();
                tasks.add(taskInfo);
            }
        }
    }
    return tasks;
}
Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) ArrayList(java.util.ArrayList) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) UUID(java.util.UUID) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId)

Example 5 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopChildProcessRunner method onTaskFinished0.

/**
     * Notifies node about task finish.
     *
     * @param run Finished task runnable.
     * @param status Task status.
     */
private void onTaskFinished0(HadoopRunnableTask run, HadoopTaskStatus status) {
    HadoopTaskInfo info = run.taskInfo();
    int pendingTasks0 = pendingTasks.decrementAndGet();
    if (log.isDebugEnabled())
        log.debug("Hadoop task execution finished [info=" + info + ", state=" + status.state() + ", waitTime=" + run.waitTime() + ", execTime=" + run.executionTime() + ", pendingTasks=" + pendingTasks0 + ", err=" + status.failCause() + ']');
    assert info.type() == MAP || info.type() == REDUCE : "Only MAP or REDUCE tasks are supported.";
    boolean flush = pendingTasks0 == 0 && info.type() == MAP;
    notifyTaskFinished(info, status, flush);
}
Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)

Aggregations

HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)14 HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)5 UUID (java.util.UUID)4 HadoopJobId (org.apache.ignite.internal.processors.hadoop.HadoopJobId)4 ArrayList (java.util.ArrayList)3 URI (java.net.URI)2 Collection (java.util.Collection)2 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)2 HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)2 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)2 IgfsPath (org.apache.ignite.igfs.IgfsPath)2 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)2 HadoopTaskCancelledException (org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)2 HadoopTaskContext (org.apache.ignite.internal.processors.hadoop.HadoopTaskContext)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInput (java.io.DataInput)1 DataInputStream (java.io.DataInputStream)1 PrintWriter (java.io.PrintWriter)1 MutableEntry (javax.cache.processor.MutableEntry)1 IntWritable (org.apache.hadoop.io.IntWritable)1