Examples with HadoopJobEx - org.apache.ignite.internal.processors.hadoop.HadoopJobEx

Example 6 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopTasksAllVersionsTest method testReduceTask.

/**
 * Tests reduce task execution.
 *
 * @throws Exception If fails.
 */
public void testReduceTask() throws Exception {
    HadoopJobEx gridJob = getHadoopJob(igfsScheme() + PATH_INPUT, igfsScheme() + PATH_OUTPUT);
    runTaskWithInput(gridJob, HadoopTaskType.REDUCE, 0, "word1", "5", "word2", "10");
    runTaskWithInput(gridJob, HadoopTaskType.REDUCE, 1, "word3", "7", "word4", "15");
    assertEquals("word1\t5\n" + "word2\t10\n", readAndSortFile(PATH_OUTPUT + "/_temporary/0/task_00000000-0000-0000-0000-000000000000_0000_r_000000/" + getOutputFileNamePrefix() + "00000"));
    assertEquals("word3\t7\n" + "word4\t15\n", readAndSortFile(PATH_OUTPUT + "/_temporary/0/task_00000000-0000-0000-0000-000000000000_0000_r_000001/" + getOutputFileNamePrefix() + "00001"));
}

Also used : HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx)

Example 7 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopTasksAllVersionsTest method testAllTasks.

/**
 * Tests all job in complex.
 * Runs 2 chains of map-combine tasks and sends result into one reduce task.
 *
 * @throws Exception If fails.
 */
@SuppressWarnings("ConstantConditions")
public void testAllTasks() throws Exception {
    IgfsPath inDir = new IgfsPath(PATH_INPUT);
    igfs.mkdirs(inDir);
    IgfsPath inFile = new IgfsPath(inDir, HadoopWordCount2.class.getSimpleName() + "-input");
    URI inFileUri = URI.create(igfsScheme() + inFile.toString());
    generateTestFile(inFile.toString(), "red", 100, "blue", 200, "green", 150, "yellow", 70);
    // Split file into two blocks
    long fileLen = igfs.info(inFile).length();
    Long l = fileLen / 2;
    HadoopFileBlock fileBlock1 = new HadoopFileBlock(HOSTS, inFileUri, 0, l);
    HadoopFileBlock fileBlock2 = new HadoopFileBlock(HOSTS, inFileUri, l, fileLen - l);
    HadoopJobEx gridJob = getHadoopJob(inFileUri.toString(), igfsScheme() + PATH_OUTPUT);
    HadoopTestTaskContext combine1Ctx = runMapCombineTask(fileBlock1, gridJob);
    HadoopTestTaskContext combine2Ctx = runMapCombineTask(fileBlock2, gridJob);
    // Prepare input for combine
    HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.REDUCE, gridJob.id(), 0, 0, null);
    HadoopTestTaskContext reduceCtx = new HadoopTestTaskContext(taskInfo, gridJob);
    reduceCtx.makeTreeOfWritables(combine1Ctx.mockOutput());
    reduceCtx.makeTreeOfWritables(combine2Ctx.mockOutput());
    reduceCtx.run();
    reduceCtx.taskInfo(new HadoopTaskInfo(HadoopTaskType.COMMIT, gridJob.id(), 0, 0, null));
    reduceCtx.run();
    assertEquals("blue\t200\n" + "green\t150\n" + "red\t100\n" + "yellow\t70\n", readAndSortFile(PATH_OUTPUT + "/" + getOutputFileNamePrefix() + "00000"));
}

Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) URI(java.net.URI)

Example 8 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopTasksAllVersionsTest method testMapTask.

/**
 * Tests map task execution.
 *
 * @throws Exception If fails.
 */
@SuppressWarnings("ConstantConditions")
public void testMapTask() throws Exception {
    IgfsPath inDir = new IgfsPath(PATH_INPUT);
    igfs.mkdirs(inDir);
    IgfsPath inFile = new IgfsPath(inDir, HadoopWordCount2.class.getSimpleName() + "-input");
    URI inFileUri = URI.create(igfsScheme() + inFile.toString());
    try (PrintWriter pw = new PrintWriter(igfs.create(inFile, true))) {
        pw.println("hello0 world0");
        pw.println("world1 hello1");
    }
    HadoopFileBlock fileBlock1 = new HadoopFileBlock(HOSTS, inFileUri, 0, igfs.info(inFile).length() - 1);
    try (PrintWriter pw = new PrintWriter(igfs.append(inFile, false))) {
        pw.println("hello2 world2");
        pw.println("world3 hello3");
    }
    HadoopFileBlock fileBlock2 = new HadoopFileBlock(HOSTS, inFileUri, fileBlock1.length(), igfs.info(inFile).length() - fileBlock1.length());
    HadoopJobEx gridJob = getHadoopJob(igfsScheme() + inFile.toString(), igfsScheme() + PATH_OUTPUT);
    HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock1);
    HadoopTestTaskContext ctx = new HadoopTestTaskContext(taskInfo, gridJob);
    ctx.mockOutput().clear();
    ctx.run();
    assertEquals("hello0,1; world0,1; world1,1; hello1,1", Joiner.on("; ").join(ctx.mockOutput()));
    ctx.mockOutput().clear();
    ctx.taskInfo(new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock2));
    ctx.run();
    assertEquals("hello2,1; world2,1; world3,1; hello3,1", Joiner.on("; ").join(ctx.mockOutput()));
}

Example 9 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopJobTracker method processNodeLeft.

/**
 * Processes node leave (or fail) event.
 *
 * @param evt Discovery event.
 */
@SuppressWarnings("ConstantConditions")
private void processNodeLeft(DiscoveryEvent evt) {
    if (log.isDebugEnabled())
        log.debug("Processing discovery event [locNodeId=" + ctx.localNodeId() + ", evt=" + evt + ']');
    // Check only if this node is responsible for job status updates.
    if (ctx.jobUpdateLeader()) {
        boolean checkSetup = evt.eventNode().order() < ctx.localNodeOrder();
        Iterable<IgniteCache.Entry<HadoopJobId, HadoopJobMetadata>> entries;
        try {
            entries = jobMetaCache().localEntries(OFFHEAP_PEEK_MODE);
        } catch (IgniteCheckedException e) {
            U.error(log, "Failed to get local entries", e);
            return;
        }
        // Iteration over all local entries is correct since system cache is REPLICATED.
        for (IgniteCache.Entry<HadoopJobId, HadoopJobMetadata> entry : entries) {
            HadoopJobMetadata meta = entry.getValue();
            HadoopJobId jobId = meta.jobId();
            HadoopMapReducePlan plan = meta.mapReducePlan();
            HadoopJobPhase phase = meta.phase();
            try {
                if (checkSetup && phase == PHASE_SETUP && !activeJobs.containsKey(jobId)) {
                    // Failover setup task.
                    HadoopJobEx job = job(jobId, meta.jobInfo());
                    Collection<HadoopTaskInfo> setupTask = setupTask(jobId);
                    assert setupTask != null;
                    ctx.taskExecutor().run(job, setupTask);
                } else if (phase == PHASE_MAP || phase == PHASE_REDUCE) {
                    // Must check all nodes, even that are not event node ID due to
                    // multiple node failure possibility.
                    Collection<HadoopInputSplit> cancelSplits = null;
                    for (UUID nodeId : plan.mapperNodeIds()) {
                        if (ctx.kernalContext().discovery().node(nodeId) == null) {
                            // Node has left the grid.
                            Collection<HadoopInputSplit> mappers = plan.mappers(nodeId);
                            if (cancelSplits == null)
                                cancelSplits = new HashSet<>();
                            cancelSplits.addAll(mappers);
                        }
                    }
                    Collection<Integer> cancelReducers = null;
                    for (UUID nodeId : plan.reducerNodeIds()) {
                        if (ctx.kernalContext().discovery().node(nodeId) == null) {
                            // Node has left the grid.
                            int[] reducers = plan.reducers(nodeId);
                            if (cancelReducers == null)
                                cancelReducers = new HashSet<>();
                            for (int rdc : reducers) cancelReducers.add(rdc);
                        }
                    }
                    if (cancelSplits != null || cancelReducers != null)
                        jobMetaCache().invoke(meta.jobId(), new CancelJobProcessor(null, new IgniteCheckedException("One or more nodes participating in map-reduce job execution failed."), cancelSplits, cancelReducers));
                }
            } catch (IgniteCheckedException e) {
                U.error(log, "Failed to cancel job: " + meta, e);
            }
        }
    }
}

Also used : IgniteCache(org.apache.ignite.IgniteCache) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) MutableEntry(javax.cache.processor.MutableEntry) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopJobPhase(org.apache.ignite.internal.processors.hadoop.HadoopJobPhase) Collection(java.util.Collection) UUID(java.util.UUID)

Example 10 with HadoopJobEx

use of org.apache.ignite.internal.processors.hadoop.HadoopJobEx in project ignite by apache.

the class HadoopJobTracker method job.

/**
 * Gets or creates job instance.
 *
 * @param jobId Job ID.
 * @param jobInfo Job info.
 * @return Job.
 * @throws IgniteCheckedException If failed.
 */
@Nullable
public HadoopJobEx job(HadoopJobId jobId, @Nullable HadoopJobInfo jobInfo) throws IgniteCheckedException {
    GridFutureAdapter<HadoopJobEx> fut = jobs.get(jobId);
    if (fut != null || (fut = jobs.putIfAbsent(jobId, new GridFutureAdapter<HadoopJobEx>())) != null)
        return fut.get();
    fut = jobs.get(jobId);
    HadoopJobEx job = null;
    try {
        if (jobInfo == null) {
            HadoopJobMetadata meta = jobMetaCache().get(jobId);
            if (meta == null)
                throw new IgniteCheckedException("Failed to find job metadata for ID: " + jobId);
            jobInfo = meta.jobInfo();
        }
        job = jobInfo.createJob(jobCls, jobId, log, ctx.configuration().getNativeLibraryNames(), ctx.kernalContext().hadoopHelper());
        job.initialize(false, ctx.localNodeId());
        fut.onDone(job);
        return job;
    } catch (IgniteCheckedException e) {
        fut.onDone(e);
        jobs.remove(jobId, fut);
        if (job != null) {
            try {
                job.dispose(false);
            } catch (IgniteCheckedException e0) {
                U.error(log, "Failed to dispose job: " + jobId, e0);
            }
        }
        throw e;
    }
}

Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) Nullable(org.jetbrains.annotations.Nullable)

Aggregations

HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)12 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)7 HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)5 HadoopTaskCancelledException (org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)4 JobConf (org.apache.hadoop.mapred.JobConf)3 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)3 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)3 HadoopJobId (org.apache.ignite.internal.processors.hadoop.HadoopJobId)3 URI (java.net.URI)2 Collection (java.util.Collection)2 UUID (java.util.UUID)2 HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)2 IgfsPath (org.apache.ignite.igfs.IgfsPath)2 HadoopClassLoader (org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)2 HadoopV2TaskContext (org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext)2 GridFutureAdapter (org.apache.ignite.internal.util.future.GridFutureAdapter)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInput (java.io.DataInput)1 DataInputStream (java.io.DataInputStream)1 PrintWriter (java.io.PrintWriter)1