Search in sources :

Example 6 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopJobTracker method processJobMetaUpdate.

/**
 * @param jobId Job ID.
 * @param meta Job metadata.
 * @param locNodeId Local node ID.
 * @throws IgniteCheckedException If failed.
 */
private void processJobMetaUpdate(HadoopJobId jobId, HadoopJobMetadata meta, UUID locNodeId) throws IgniteCheckedException {
    JobLocalState state = activeJobs.get(jobId);
    HadoopJobEx job = job(jobId, meta.jobInfo());
    HadoopMapReducePlan plan = meta.mapReducePlan();
    switch(meta.phase()) {
        case PHASE_SETUP:
            {
                if (ctx.jobUpdateLeader()) {
                    Collection<HadoopTaskInfo> setupTask = setupTask(jobId);
                    if (setupTask != null)
                        ctx.taskExecutor().run(job, setupTask);
                }
                break;
            }
        case PHASE_MAP:
            {
                // Check if we should initiate new task on local node.
                Collection<HadoopTaskInfo> tasks = mapperTasks(plan.mappers(locNodeId), meta);
                if (tasks != null)
                    ctx.taskExecutor().run(job, tasks);
                break;
            }
        case PHASE_REDUCE:
            {
                if (meta.pendingReducers().isEmpty() && ctx.jobUpdateLeader()) {
                    HadoopTaskInfo info = new HadoopTaskInfo(COMMIT, jobId, 0, 0, null);
                    if (log.isDebugEnabled())
                        log.debug("Submitting COMMIT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
                    ctx.taskExecutor().run(job, Collections.singletonList(info));
                    break;
                }
                Collection<HadoopTaskInfo> tasks = reducerTasks(plan.reducers(locNodeId), job);
                if (tasks != null)
                    ctx.taskExecutor().run(job, tasks);
                break;
            }
        case PHASE_CANCELLING:
            {
                // Prevent multiple task executor notification.
                if (state != null && state.onCancel()) {
                    if (log.isDebugEnabled())
                        log.debug("Cancelling local task execution for job: " + meta);
                    ctx.taskExecutor().cancelTasks(jobId);
                }
                if (meta.pendingSplits().isEmpty() && meta.pendingReducers().isEmpty()) {
                    if (ctx.jobUpdateLeader()) {
                        if (state == null)
                            state = initState(jobId);
                        // Prevent running multiple abort tasks.
                        if (state.onAborted()) {
                            HadoopTaskInfo info = new HadoopTaskInfo(ABORT, jobId, 0, 0, null);
                            if (log.isDebugEnabled())
                                log.debug("Submitting ABORT task for execution [locNodeId=" + locNodeId + ", jobId=" + jobId + ']');
                            ctx.taskExecutor().run(job, Collections.singletonList(info));
                        }
                    }
                    break;
                } else {
                    // Check if there are unscheduled mappers or reducers.
                    Collection<HadoopInputSplit> cancelMappers = new ArrayList<>();
                    Collection<Integer> cancelReducers = new ArrayList<>();
                    Collection<HadoopInputSplit> mappers = plan.mappers(ctx.localNodeId());
                    if (mappers != null) {
                        for (HadoopInputSplit b : mappers) {
                            if (state == null || !state.mapperScheduled(b))
                                cancelMappers.add(b);
                        }
                    }
                    int[] rdc = plan.reducers(ctx.localNodeId());
                    if (rdc != null) {
                        for (int r : rdc) {
                            if (state == null || !state.reducerScheduled(r))
                                cancelReducers.add(r);
                        }
                    }
                    if (!cancelMappers.isEmpty() || !cancelReducers.isEmpty())
                        transform(jobId, new CancelJobProcessor(null, cancelMappers, cancelReducers));
                }
                break;
            }
        case PHASE_COMPLETE:
            {
                if (log.isDebugEnabled())
                    log.debug("Job execution is complete, will remove local state from active jobs " + "[jobId=" + jobId + ", meta=" + meta + ']');
                if (state != null) {
                    state = activeJobs.remove(jobId);
                    assert state != null;
                    ctx.shuffle().jobFinished(jobId);
                }
                GridFutureAdapter<HadoopJobId> finishFut = activeFinishFuts.remove(jobId);
                if (finishFut != null) {
                    if (log.isDebugEnabled())
                        log.debug("Completing job future [locNodeId=" + locNodeId + ", meta=" + meta + ']');
                    finishFut.onDone(jobId, meta.failCause());
                }
                assert job != null;
                if (ctx.jobUpdateLeader())
                    job.cleanupStagingDirectory();
                jobs.remove(jobId);
                if (ctx.jobUpdateLeader()) {
                    ClassLoader ldr = job.getClass().getClassLoader();
                    try {
                        String statWriterClsName = job.info().property(HadoopCommonUtils.JOB_COUNTER_WRITER_PROPERTY);
                        if (statWriterClsName != null) {
                            Class<?> cls = ldr.loadClass(statWriterClsName);
                            HadoopCounterWriter writer = (HadoopCounterWriter) cls.newInstance();
                            HadoopCounters cntrs = meta.counters();
                            writer.write(job, cntrs);
                        }
                    } catch (Exception e) {
                        log.error("Can't write statistic due to: ", e);
                    }
                }
                job.dispose(false);
                break;
            }
        default:
            throw new IllegalStateException("Unknown phase: " + meta.phase());
    }
}
Also used : HadoopCounterWriter(org.apache.ignite.internal.processors.hadoop.counter.HadoopCounterWriter) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopCounters(org.apache.ignite.internal.processors.hadoop.counter.HadoopCounters) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) Collection(java.util.Collection) HadoopClassLoader(org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)

Example 7 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopWeightedMapReducePlannerTest method testOneIgfsSplitAffinity.

/**
 * Test one IGFS split being assigned to affinity node.
 *
 * @throws Exception If failed.
 */
public void testOneIgfsSplitAffinity() throws Exception {
    IgfsMock igfs = LocationsBuilder.create().add(0, NODE_1).add(50, NODE_2).add(100, NODE_3).buildIgfs();
    List<HadoopInputSplit> splits = new ArrayList<>();
    splits.add(new HadoopFileBlock(new String[] { HOST_1 }, URI.create("igfs://igfs@/file"), 0, 50));
    final int expReducers = 4;
    HadoopPlannerMockJob job = new HadoopPlannerMockJob(splits, expReducers);
    IgniteHadoopWeightedMapReducePlanner planner = createPlanner(igfs);
    HadoopMapReducePlan plan = planner.preparePlan(job, NODES, null);
    assert plan.mappers() == 1;
    assert plan.mapperNodeIds().size() == 1;
    assert plan.mapperNodeIds().contains(ID_1);
    checkPlanMappers(plan, splits, NODES, false);
    checkPlanReducers(plan, NODES, expReducers, false);
}
Also used : HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) IgniteHadoopWeightedMapReducePlanner(org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner) ArrayList(java.util.ArrayList) IgfsMock(org.apache.ignite.internal.processors.igfs.IgfsMock) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)

Example 8 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopWeightedMapReducePlannerTest method testHdfsSplitsReplication.

/**
 * Test HDFS splits with Replication == 3.
 *
 * @throws Exception If failed.
 */
public void testHdfsSplitsReplication() throws Exception {
    IgfsMock igfs = LocationsBuilder.create().add(0, NODE_1).add(50, NODE_2).add(100, NODE_3).buildIgfs();
    final List<HadoopInputSplit> splits = new ArrayList<>();
    splits.add(new HadoopFileBlock(new String[] { HOST_1, HOST_2, HOST_3 }, URI.create("hfds://" + HOST_1 + "/x"), 0, 50));
    splits.add(new HadoopFileBlock(new String[] { HOST_2, HOST_3, HOST_4 }, URI.create("hfds://" + HOST_2 + "/x"), 50, 100));
    splits.add(new HadoopFileBlock(new String[] { HOST_3, HOST_4, HOST_5 }, URI.create("hfds://" + HOST_3 + "/x"), 100, 37));
    // The following splits belong to hosts that are out of Ignite topology at all.
    // This means that these splits should be assigned to any least loaded modes:
    splits.add(new HadoopFileBlock(new String[] { HOST_4, HOST_5, HOST_1 }, URI.create("hfds://" + HOST_4 + "/x"), 138, 2));
    splits.add(new HadoopFileBlock(new String[] { HOST_5, HOST_1, HOST_2 }, URI.create("hfds://" + HOST_5 + "/x"), 140, 3));
    final int expReducers = 8;
    HadoopPlannerMockJob job = new HadoopPlannerMockJob(splits, expReducers);
    IgniteHadoopWeightedMapReducePlanner planner = createPlanner(igfs);
    final HadoopMapReducePlan plan = planner.preparePlan(job, NODES, null);
    checkPlanMappers(plan, splits, NODES, true);
    checkPlanReducers(plan, NODES, expReducers, true);
}
Also used : HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) IgniteHadoopWeightedMapReducePlanner(org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner) ArrayList(java.util.ArrayList) IgfsMock(org.apache.ignite.internal.processors.igfs.IgfsMock) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)

Example 9 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class IgniteHadoopWeightedMapReducePlanner method assignReducersToSplits.

/**
 * Distribute reducers between splits.
 *
 * @param splits Splits.
 * @param reducerCnt Reducer count.
 * @return Map from input split to reducer count.
 */
private Map<HadoopInputSplit, Integer> assignReducersToSplits(Collection<HadoopInputSplit> splits, int reducerCnt) {
    Map<HadoopInputSplit, Integer> res = new IdentityHashMap<>(splits.size());
    int base = reducerCnt / splits.size();
    int remainder = reducerCnt % splits.size();
    for (HadoopInputSplit split : splits) {
        int val = base;
        if (remainder > 0) {
            val++;
            remainder--;
        }
        res.put(split, val);
    }
    assert remainder == 0;
    return res;
}
Also used : IdentityHashMap(java.util.IdentityHashMap) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopIgfsEndpoint(org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)

Example 10 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class IgniteHadoopWeightedMapReducePlanner method assignReducers0.

/**
 * Generate reducers.
 *
 * @param top Topology.
 * @param splits Input splits.
 * @param mappers Mappers.
 * @param reducerCnt Reducer count.
 * @return Reducers.
 */
private Map<UUID, Integer> assignReducers0(HadoopMapReducePlanTopology top, Collection<HadoopInputSplit> splits, Mappers mappers, int reducerCnt) {
    Map<UUID, Integer> res = new HashMap<>();
    // Assign reducers to splits.
    Map<HadoopInputSplit, Integer> splitToReducerCnt = assignReducersToSplits(splits, reducerCnt);
    // Assign as much local reducers as possible.
    int remaining = 0;
    for (Map.Entry<HadoopInputSplit, Integer> entry : splitToReducerCnt.entrySet()) {
        HadoopInputSplit split = entry.getKey();
        int cnt = entry.getValue();
        if (cnt > 0) {
            int assigned = assignLocalReducers(split, cnt, top, mappers, res);
            assert assigned <= cnt;
            remaining += cnt - assigned;
        }
    }
    // Assign the rest reducers.
    if (remaining > 0)
        assignRemoteReducers(remaining, top, mappers, res);
    return res;
}
Also used : HashMap(java.util.HashMap) IdentityHashMap(java.util.IdentityHashMap) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) UUID(java.util.UUID) HashMap(java.util.HashMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) TreeMap(java.util.TreeMap) HadoopIgfsEndpoint(org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)

Aggregations

HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)19 ArrayList (java.util.ArrayList)8 UUID (java.util.UUID)8 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)8 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)7 HashMap (java.util.HashMap)4 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)4 Collection (java.util.Collection)3 Path (org.apache.hadoop.fs.Path)3 IgniteHadoopWeightedMapReducePlanner (org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner)3 HadoopIgfsEndpoint (org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)3 IgfsMock (org.apache.ignite.internal.processors.igfs.IgfsMock)3 IOException (java.io.IOException)2 IdentityHashMap (java.util.IdentityHashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2 HadoopClassLoader (org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)2