Search in sources :

Example 6 with HadoopFileBlock

use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.

the class IgniteHadoopWeightedMapReducePlanner method igfsAffinityNodesForSplit.

/**
 * Get IGFS affinity nodes for split if possible.
 * <p>
 * Order in the returned collection *is* significant, meaning that nodes containing more data
 * go first. This way, the 1st nodes in the collection considered to be preferable for scheduling.
 *
 * @param split Input split.
 * @return IGFS affinity or {@code null} if IGFS is not available.
 * @throws IgniteCheckedException If failed.
 */
@Nullable
private Collection<UUID> igfsAffinityNodesForSplit(HadoopInputSplit split) throws IgniteCheckedException {
    if (split instanceof HadoopFileBlock) {
        HadoopFileBlock split0 = (HadoopFileBlock) split;
        if (IgniteFileSystem.IGFS_SCHEME.equalsIgnoreCase(split0.file().getScheme())) {
            HadoopIgfsEndpoint endpoint = new HadoopIgfsEndpoint(split0.file().getAuthority());
            IgfsEx igfs = (IgfsEx) ((IgniteEx) ignite).igfsx(endpoint.igfs());
            if (igfs != null && !igfs.isProxy(split0.file())) {
                IgfsPath path = new IgfsPath(split0.file());
                if (igfs.exists(path)) {
                    Collection<IgfsBlockLocation> blocks;
                    try {
                        blocks = igfs.affinity(path, split0.start(), split0.length());
                    } catch (IgniteException e) {
                        throw new IgniteCheckedException("Failed to get IGFS file block affinity [path=" + path + ", start=" + split0.start() + ", len=" + split0.length() + ']', e);
                    }
                    assert blocks != null;
                    if (blocks.size() == 1)
                        return blocks.iterator().next().nodeIds();
                    else {
                        // The most "local" nodes go first.
                        Map<UUID, Long> idToLen = new HashMap<>();
                        for (IgfsBlockLocation block : blocks) {
                            for (UUID id : block.nodeIds()) {
                                Long len = idToLen.get(id);
                                idToLen.put(id, len == null ? block.length() : block.length() + len);
                            }
                        }
                        // Sort the nodes in non-ascending order by contained data lengths.
                        Map<NodeIdAndLength, UUID> res = new TreeMap<>();
                        for (Map.Entry<UUID, Long> idToLenEntry : idToLen.entrySet()) {
                            UUID id = idToLenEntry.getKey();
                            res.put(new NodeIdAndLength(id, idToLenEntry.getValue()), id);
                        }
                        return new LinkedHashSet<>(res.values());
                    }
                }
            }
        }
    }
    return null;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) IdentityHashMap(java.util.IdentityHashMap) HadoopIgfsEndpoint(org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint) IgfsBlockLocation(org.apache.ignite.igfs.IgfsBlockLocation) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) TreeMap(java.util.TreeMap) IgfsPath(org.apache.ignite.igfs.IgfsPath) IgfsEx(org.apache.ignite.internal.processors.igfs.IgfsEx) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteException(org.apache.ignite.IgniteException) UUID(java.util.UUID) HashMap(java.util.HashMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) TreeMap(java.util.TreeMap) Nullable(org.jetbrains.annotations.Nullable)

Example 7 with HadoopFileBlock

use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.

the class HadoopTasksAllVersionsTest method testAllTasks.

/**
 * Tests all job in complex.
 * Runs 2 chains of map-combine tasks and sends result into one reduce task.
 *
 * @throws Exception If fails.
 */
@SuppressWarnings("ConstantConditions")
public void testAllTasks() throws Exception {
    IgfsPath inDir = new IgfsPath(PATH_INPUT);
    igfs.mkdirs(inDir);
    IgfsPath inFile = new IgfsPath(inDir, HadoopWordCount2.class.getSimpleName() + "-input");
    URI inFileUri = URI.create(igfsScheme() + inFile.toString());
    generateTestFile(inFile.toString(), "red", 100, "blue", 200, "green", 150, "yellow", 70);
    // Split file into two blocks
    long fileLen = igfs.info(inFile).length();
    Long l = fileLen / 2;
    HadoopFileBlock fileBlock1 = new HadoopFileBlock(HOSTS, inFileUri, 0, l);
    HadoopFileBlock fileBlock2 = new HadoopFileBlock(HOSTS, inFileUri, l, fileLen - l);
    HadoopJobEx gridJob = getHadoopJob(inFileUri.toString(), igfsScheme() + PATH_OUTPUT);
    HadoopTestTaskContext combine1Ctx = runMapCombineTask(fileBlock1, gridJob);
    HadoopTestTaskContext combine2Ctx = runMapCombineTask(fileBlock2, gridJob);
    // Prepare input for combine
    HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.REDUCE, gridJob.id(), 0, 0, null);
    HadoopTestTaskContext reduceCtx = new HadoopTestTaskContext(taskInfo, gridJob);
    reduceCtx.makeTreeOfWritables(combine1Ctx.mockOutput());
    reduceCtx.makeTreeOfWritables(combine2Ctx.mockOutput());
    reduceCtx.run();
    reduceCtx.taskInfo(new HadoopTaskInfo(HadoopTaskType.COMMIT, gridJob.id(), 0, 0, null));
    reduceCtx.run();
    assertEquals("blue\t200\n" + "green\t150\n" + "red\t100\n" + "yellow\t70\n", readAndSortFile(PATH_OUTPUT + "/" + getOutputFileNamePrefix() + "00000"));
}
Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) URI(java.net.URI)

Example 8 with HadoopFileBlock

use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.

the class HadoopTasksAllVersionsTest method testMapTask.

/**
 * Tests map task execution.
 *
 * @throws Exception If fails.
 */
@SuppressWarnings("ConstantConditions")
public void testMapTask() throws Exception {
    IgfsPath inDir = new IgfsPath(PATH_INPUT);
    igfs.mkdirs(inDir);
    IgfsPath inFile = new IgfsPath(inDir, HadoopWordCount2.class.getSimpleName() + "-input");
    URI inFileUri = URI.create(igfsScheme() + inFile.toString());
    try (PrintWriter pw = new PrintWriter(igfs.create(inFile, true))) {
        pw.println("hello0 world0");
        pw.println("world1 hello1");
    }
    HadoopFileBlock fileBlock1 = new HadoopFileBlock(HOSTS, inFileUri, 0, igfs.info(inFile).length() - 1);
    try (PrintWriter pw = new PrintWriter(igfs.append(inFile, false))) {
        pw.println("hello2 world2");
        pw.println("world3 hello3");
    }
    HadoopFileBlock fileBlock2 = new HadoopFileBlock(HOSTS, inFileUri, fileBlock1.length(), igfs.info(inFile).length() - fileBlock1.length());
    HadoopJobEx gridJob = getHadoopJob(igfsScheme() + inFile.toString(), igfsScheme() + PATH_OUTPUT);
    HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock1);
    HadoopTestTaskContext ctx = new HadoopTestTaskContext(taskInfo, gridJob);
    ctx.mockOutput().clear();
    ctx.run();
    assertEquals("hello0,1; world0,1; world1,1; hello1,1", Joiner.on("; ").join(ctx.mockOutput()));
    ctx.mockOutput().clear();
    ctx.taskInfo(new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock2));
    ctx.run();
    assertEquals("hello2,1; world2,1; world3,1; hello3,1", Joiner.on("; ").join(ctx.mockOutput()));
}
Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) URI(java.net.URI) PrintWriter(java.io.PrintWriter)

Example 9 with HadoopFileBlock

use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.

the class HadoopWeightedMapReducePlannerTest method testHdfsSplitsAffinity.

/**
 * Test one HDFS splits.
 *
 * @throws Exception If failed.
 */
public void testHdfsSplitsAffinity() throws Exception {
    IgfsMock igfs = LocationsBuilder.create().add(0, NODE_1).add(50, NODE_2).add(100, NODE_3).buildIgfs();
    final List<HadoopInputSplit> splits = new ArrayList<>();
    splits.add(new HadoopFileBlock(new String[] { HOST_1 }, URI.create("hfds://" + HOST_1 + "/x"), 0, 50));
    splits.add(new HadoopFileBlock(new String[] { HOST_2 }, URI.create("hfds://" + HOST_2 + "/x"), 50, 100));
    splits.add(new HadoopFileBlock(new String[] { HOST_3 }, URI.create("hfds://" + HOST_3 + "/x"), 100, 37));
    // The following splits belong to hosts that are out of Ignite topology at all.
    // This means that these splits should be assigned to any least loaded modes:
    splits.add(new HadoopFileBlock(new String[] { HOST_4 }, URI.create("hfds://" + HOST_4 + "/x"), 138, 2));
    splits.add(new HadoopFileBlock(new String[] { HOST_5 }, URI.create("hfds://" + HOST_5 + "/x"), 140, 3));
    final int expReducers = 7;
    HadoopPlannerMockJob job = new HadoopPlannerMockJob(splits, expReducers);
    IgniteHadoopWeightedMapReducePlanner planner = createPlanner(igfs);
    final HadoopMapReducePlan plan = planner.preparePlan(job, NODES, null);
    checkPlanMappers(plan, splits, NODES, true);
    checkPlanReducers(plan, NODES, expReducers, true);
}
Also used : HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) IgniteHadoopWeightedMapReducePlanner(org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner) ArrayList(java.util.ArrayList) IgfsMock(org.apache.ignite.internal.processors.igfs.IgfsMock) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)

Example 10 with HadoopFileBlock

use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.

the class HadoopV2Context method getInputSplit.

/**
 * {@inheritDoc}
 */
@Override
public InputSplit getInputSplit() {
    if (inputSplit == null) {
        HadoopInputSplit split = ctx.taskInfo().inputSplit();
        if (split == null)
            return null;
        if (split instanceof HadoopFileBlock) {
            HadoopFileBlock fileBlock = (HadoopFileBlock) split;
            inputSplit = new FileSplit(new Path(fileBlock.file()), fileBlock.start(), fileBlock.length(), null);
        } else {
            try {
                inputSplit = (InputSplit) ((HadoopV2TaskContext) ctx).getNativeSplit(split);
            } catch (IgniteCheckedException e) {
                throw new IllegalStateException(e);
            }
        }
    }
    return inputSplit;
}
Also used : Path(org.apache.hadoop.fs.Path) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit)

Aggregations

HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)12 HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)8 ArrayList (java.util.ArrayList)6 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)6 Path (org.apache.hadoop.fs.Path)3 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)3 IgniteHadoopWeightedMapReducePlanner (org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner)3 IgfsPath (org.apache.ignite.igfs.IgfsPath)3 HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)3 IgfsMock (org.apache.ignite.internal.processors.igfs.IgfsMock)3 IOException (java.io.IOException)2 URI (java.net.URI)2 LinkedHashSet (java.util.LinkedHashSet)2 TreeMap (java.util.TreeMap)2 UUID (java.util.UUID)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2 IgniteException (org.apache.ignite.IgniteException)2 HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)2