Examples with HadoopTaskInfo - org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo

Example 6 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopChildProcessRunner method onTaskFinished0.

/**
 * Notifies node about task finish.
 *
 * @param run Finished task runnable.
 * @param status Task status.
 */
private void onTaskFinished0(HadoopRunnableTask run, HadoopTaskStatus status) {
    HadoopTaskInfo info = run.taskInfo();
    int pendingTasks0 = pendingTasks.decrementAndGet();
    if (log.isDebugEnabled())
        log.debug("Hadoop task execution finished [info=" + info + ", state=" + status.state() + ", waitTime=" + run.waitTime() + ", execTime=" + run.executionTime() + ", pendingTasks=" + pendingTasks0 + ", err=" + status.failCause() + ']');
    assert info.type() == MAP || info.type() == REDUCE : "Only MAP or REDUCE tasks are supported.";
    boolean flush = pendingTasks0 == 0 && info.type() == MAP;
    notifyTaskFinished(info, status, flush);
}

Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)

Example 7 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopV2JobSelfTest method testCustomSerializationApplying.

/**
 * Tests that {@link HadoopJobEx} provides wrapped serializer if it's set in configuration.
 *
 * @throws IgniteCheckedException If fails.
 */
public void testCustomSerializationApplying() throws IgniteCheckedException {
    JobConf cfg = new JobConf();
    cfg.setMapOutputKeyClass(IntWritable.class);
    cfg.setMapOutputValueClass(Text.class);
    cfg.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
    HadoopDefaultJobInfo info = createJobInfo(cfg, null);
    final UUID uuid = UUID.randomUUID();
    HadoopJobId id = new HadoopJobId(uuid, 1);
    HadoopJobEx job = info.createJob(HadoopV2Job.class, id, log, null, new HadoopHelperImpl());
    HadoopTaskContext taskCtx = job.getTaskContext(new HadoopTaskInfo(HadoopTaskType.MAP, null, 0, 0, null));
    HadoopSerialization ser = taskCtx.keySerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    DataInput in = new DataInputStream(new ByteArrayInputStream(new byte[0]));
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
    ser = taskCtx.valueSerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
}

Also used : HadoopHelperImpl(org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl) DataInputStream(java.io.DataInputStream) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) HadoopSerializationWrapper(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopSerializationWrapper) UUID(java.util.UUID) HadoopSerialization(org.apache.ignite.internal.processors.hadoop.HadoopSerialization) JobConf(org.apache.hadoop.mapred.JobConf)

Example 8 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopTasksAllVersionsTest method runTaskWithInput.

/**
 * Generates input data for reduce-like operation into mock context input and runs the operation.
 *
 * @param gridJob Job is to create reduce task from.
 * @param taskType Type of task - combine or reduce.
 * @param taskNum Number of task in job.
 * @param words Pairs of words and its counts.
 * @return Context with mock output.
 * @throws IgniteCheckedException If fails.
 */
private HadoopTestTaskContext runTaskWithInput(HadoopJobEx gridJob, HadoopTaskType taskType, int taskNum, String... words) throws IgniteCheckedException {
    HadoopTaskInfo taskInfo = new HadoopTaskInfo(taskType, gridJob.id(), taskNum, 0, null);
    HadoopTestTaskContext ctx = new HadoopTestTaskContext(taskInfo, gridJob);
    for (int i = 0; i < words.length; i += 2) {
        List<IntWritable> valList = new ArrayList<>();
        for (int j = 0; j < Integer.parseInt(words[i + 1]); j++) valList.add(new IntWritable(1));
        ctx.mockInput().put(new Text(words[i]), valList);
    }
    ctx.run();
    return ctx;
}

Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable)

Example 9 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopTasksAllVersionsTest method runMapCombineTask.

/**
 * Runs chain of map-combine task on file block.
 *
 * @param fileBlock block of input file to be processed.
 * @param gridJob Hadoop job implementation.
 * @return Context of combine task with mock output.
 * @throws IgniteCheckedException If fails.
 */
private HadoopTestTaskContext runMapCombineTask(HadoopFileBlock fileBlock, HadoopJobEx gridJob) throws IgniteCheckedException {
    HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock);
    HadoopTestTaskContext mapCtx = new HadoopTestTaskContext(taskInfo, gridJob);
    mapCtx.run();
    // Prepare input for combine
    taskInfo = new HadoopTaskInfo(HadoopTaskType.COMBINE, gridJob.id(), 0, 0, null);
    HadoopTestTaskContext combineCtx = new HadoopTestTaskContext(taskInfo, gridJob);
    combineCtx.makeTreeOfWritables(mapCtx.mockOutput());
    combineCtx.run();
    return combineCtx;
}

Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)

Example 10 with HadoopTaskInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo in project ignite by apache.

the class HadoopTasksAllVersionsTest method testAllTasks.

/**
 * Tests all job in complex.
 * Runs 2 chains of map-combine tasks and sends result into one reduce task.
 *
 * @throws Exception If fails.
 */
@SuppressWarnings("ConstantConditions")
public void testAllTasks() throws Exception {
    IgfsPath inDir = new IgfsPath(PATH_INPUT);
    igfs.mkdirs(inDir);
    IgfsPath inFile = new IgfsPath(inDir, HadoopWordCount2.class.getSimpleName() + "-input");
    URI inFileUri = URI.create(igfsScheme() + inFile.toString());
    generateTestFile(inFile.toString(), "red", 100, "blue", 200, "green", 150, "yellow", 70);
    // Split file into two blocks
    long fileLen = igfs.info(inFile).length();
    Long l = fileLen / 2;
    HadoopFileBlock fileBlock1 = new HadoopFileBlock(HOSTS, inFileUri, 0, l);
    HadoopFileBlock fileBlock2 = new HadoopFileBlock(HOSTS, inFileUri, l, fileLen - l);
    HadoopJobEx gridJob = getHadoopJob(inFileUri.toString(), igfsScheme() + PATH_OUTPUT);
    HadoopTestTaskContext combine1Ctx = runMapCombineTask(fileBlock1, gridJob);
    HadoopTestTaskContext combine2Ctx = runMapCombineTask(fileBlock2, gridJob);
    // Prepare input for combine
    HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.REDUCE, gridJob.id(), 0, 0, null);
    HadoopTestTaskContext reduceCtx = new HadoopTestTaskContext(taskInfo, gridJob);
    reduceCtx.makeTreeOfWritables(combine1Ctx.mockOutput());
    reduceCtx.makeTreeOfWritables(combine2Ctx.mockOutput());
    reduceCtx.run();
    reduceCtx.taskInfo(new HadoopTaskInfo(HadoopTaskType.COMMIT, gridJob.id(), 0, 0, null));
    reduceCtx.run();
    assertEquals("blue\t200\n" + "green\t150\n" + "red\t100\n" + "yellow\t70\n", readAndSortFile(PATH_OUTPUT + "/" + getOutputFileNamePrefix() + "00000"));
}

Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) URI(java.net.URI)

Aggregations

HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)14 HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)5 UUID (java.util.UUID)4 HadoopJobId (org.apache.ignite.internal.processors.hadoop.HadoopJobId)4 ArrayList (java.util.ArrayList)3 URI (java.net.URI)2 Collection (java.util.Collection)2 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)2 HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)2 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)2 IgfsPath (org.apache.ignite.igfs.IgfsPath)2 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)2 HadoopTaskCancelledException (org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)2 HadoopTaskContext (org.apache.ignite.internal.processors.hadoop.HadoopTaskContext)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInput (java.io.DataInput)1 DataInputStream (java.io.DataInputStream)1 PrintWriter (java.io.PrintWriter)1 MutableEntry (javax.cache.processor.MutableEntry)1 IntWritable (org.apache.hadoop.io.IntWritable)1