Examples with HadoopInputSplit - org.apache.ignite.hadoop.HadoopInputSplit

Example 11 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopWeightedMapReducePlannerTest method testHdfsSplitsAffinity.

/**
 * Test one HDFS splits.
 *
 * @throws Exception If failed.
 */
public void testHdfsSplitsAffinity() throws Exception {
    IgfsMock igfs = LocationsBuilder.create().add(0, NODE_1).add(50, NODE_2).add(100, NODE_3).buildIgfs();
    final List<HadoopInputSplit> splits = new ArrayList<>();
    splits.add(new HadoopFileBlock(new String[] { HOST_1 }, URI.create("hfds://" + HOST_1 + "/x"), 0, 50));
    splits.add(new HadoopFileBlock(new String[] { HOST_2 }, URI.create("hfds://" + HOST_2 + "/x"), 50, 100));
    splits.add(new HadoopFileBlock(new String[] { HOST_3 }, URI.create("hfds://" + HOST_3 + "/x"), 100, 37));
    // The following splits belong to hosts that are out of Ignite topology at all.
    // This means that these splits should be assigned to any least loaded modes:
    splits.add(new HadoopFileBlock(new String[] { HOST_4 }, URI.create("hfds://" + HOST_4 + "/x"), 138, 2));
    splits.add(new HadoopFileBlock(new String[] { HOST_5 }, URI.create("hfds://" + HOST_5 + "/x"), 140, 3));
    final int expReducers = 7;
    HadoopPlannerMockJob job = new HadoopPlannerMockJob(splits, expReducers);
    IgniteHadoopWeightedMapReducePlanner planner = createPlanner(igfs);
    final HadoopMapReducePlan plan = planner.preparePlan(job, NODES, null);
    checkPlanMappers(plan, splits, NODES, true);
    checkPlanReducers(plan, NODES, expReducers, true);
}

Also used : HadoopMapReducePlan(org.apache.ignite.hadoop.HadoopMapReducePlan) IgniteHadoopWeightedMapReducePlanner(org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner) ArrayList(java.util.ArrayList) IgfsMock(org.apache.ignite.internal.processors.igfs.IgfsMock) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)

Example 12 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopWeightedMapReducePlannerTest method checkPlanMappers.

/**
 * Check mappers for the plan.
 *
 * @param plan Plan.
 * @param splits Splits.
 * @param nodes Nodes.
 * @param expectUniformity WHether uniformity is expected.
 */
private static void checkPlanMappers(HadoopMapReducePlan plan, List<HadoopInputSplit> splits, Collection<ClusterNode> nodes, boolean expectUniformity) {
    // Number of mappers should correspomd to the number of input splits:
    assertEquals(splits.size(), plan.mappers());
    if (expectUniformity) {
        // mappers are assigned to all available nodes:
        assertEquals(nodes.size(), plan.mapperNodeIds().size());
        assertEquals(allIds(nodes), plan.mapperNodeIds());
    }
    // Check all splits are covered by mappers:
    Set<HadoopInputSplit> set = new HashSet<>();
    for (UUID id : plan.mapperNodeIds()) {
        Collection<HadoopInputSplit> sp = plan.mappers(id);
        assert sp != null;
        for (HadoopInputSplit s : sp) assertTrue(set.add(s));
    }
    // must be of the same size & contain same elements:
    assertEquals(set, new HashSet<>(splits));
}

Also used : HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) UUID(java.util.UUID) HashSet(java.util.HashSet)

Example 13 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopCommonUtils method sortInputSplits.

/**
 * Sort input splits by length.
 *
 * @param splits Splits.
 * @return Sorted splits.
 */
public static List<HadoopInputSplit> sortInputSplits(Collection<HadoopInputSplit> splits) {
    int id = 0;
    TreeSet<SplitSortWrapper> sortedSplits = new TreeSet<>();
    for (HadoopInputSplit split : splits) {
        long len = split instanceof HadoopFileBlock ? ((HadoopFileBlock) split).length() : 0;
        sortedSplits.add(new SplitSortWrapper(id++, split, len));
    }
    ArrayList<HadoopInputSplit> res = new ArrayList<>(sortedSplits.size());
    for (SplitSortWrapper sortedSplit : sortedSplits) res.add(sortedSplit.split);
    return res;
}

Also used : TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit)

Example 14 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopJobTracker method allSplits.

/**
 * Gets all input splits for given hadoop map-reduce plan.
 *
 * @param plan Map-reduce plan.
 * @return Collection of all input splits that should be processed.
 */
@SuppressWarnings("ConstantConditions")
private Map<HadoopInputSplit, Integer> allSplits(HadoopMapReducePlan plan) {
    Map<HadoopInputSplit, Integer> res = new HashMap<>();
    int taskNum = 0;
    for (UUID nodeId : plan.mapperNodeIds()) {
        for (HadoopInputSplit split : plan.mappers(nodeId)) {
            if (res.put(split, taskNum++) != null)
                throw new IllegalStateException("Split duplicate.");
        }
    }
    return res;
}

Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) UUID(java.util.UUID)

Example 15 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopV2Context method getInputSplit.

/**
 * {@inheritDoc}
 */
@Override
public InputSplit getInputSplit() {
    if (inputSplit == null) {
        HadoopInputSplit split = ctx.taskInfo().inputSplit();
        if (split == null)
            return null;
        if (split instanceof HadoopFileBlock) {
            HadoopFileBlock fileBlock = (HadoopFileBlock) split;
            inputSplit = new FileSplit(new Path(fileBlock.file()), fileBlock.start(), fileBlock.length(), null);
        } else {
            try {
                inputSplit = (InputSplit) ((HadoopV2TaskContext) ctx).getNativeSplit(split);
            } catch (IgniteCheckedException e) {
                throw new IllegalStateException(e);
            }
        }
    }
    return inputSplit;
}

Also used : Path(org.apache.hadoop.fs.Path) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit)

Aggregations

HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)19 ArrayList (java.util.ArrayList)8 UUID (java.util.UUID)8 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)8 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)7 HashMap (java.util.HashMap)4 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)4 Collection (java.util.Collection)3 Path (org.apache.hadoop.fs.Path)3 IgniteHadoopWeightedMapReducePlanner (org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner)3 HadoopIgfsEndpoint (org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)3 IgfsMock (org.apache.ignite.internal.processors.igfs.IgfsMock)3 IOException (java.io.IOException)2 IdentityHashMap (java.util.IdentityHashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2 HadoopClassLoader (org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)2