Examples with HadoopInputSplit - org.apache.ignite.hadoop.HadoopInputSplit

Example 1 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class IgniteHadoopWeightedMapReducePlanner method assignReducersToSplits.

/**
     * Distribute reducers between splits.
     *
     * @param splits Splits.
     * @param reducerCnt Reducer count.
     * @return Map from input split to reducer count.
     */
private Map<HadoopInputSplit, Integer> assignReducersToSplits(Collection<HadoopInputSplit> splits, int reducerCnt) {
    Map<HadoopInputSplit, Integer> res = new IdentityHashMap<>(splits.size());
    int base = reducerCnt / splits.size();
    int remainder = reducerCnt % splits.size();
    for (HadoopInputSplit split : splits) {
        int val = base;
        if (remainder > 0) {
            val++;
            remainder--;
        }
        res.put(split, val);
    }
    assert remainder == 0;
    return res;
}

Also used : IdentityHashMap(java.util.IdentityHashMap) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopIgfsEndpoint(org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)

Example 2 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class IgniteHadoopWeightedMapReducePlanner method assignReducers0.

/**
     * Generate reducers.
     *
     * @param top Topology.
     * @param splits Input splits.
     * @param mappers Mappers.
     * @param reducerCnt Reducer count.
     * @return Reducers.
     */
private Map<UUID, Integer> assignReducers0(HadoopMapReducePlanTopology top, Collection<HadoopInputSplit> splits, Mappers mappers, int reducerCnt) {
    Map<UUID, Integer> res = new HashMap<>();
    // Assign reducers to splits.
    Map<HadoopInputSplit, Integer> splitToReducerCnt = assignReducersToSplits(splits, reducerCnt);
    // Assign as much local reducers as possible.
    int remaining = 0;
    for (Map.Entry<HadoopInputSplit, Integer> entry : splitToReducerCnt.entrySet()) {
        HadoopInputSplit split = entry.getKey();
        int cnt = entry.getValue();
        if (cnt > 0) {
            int assigned = assignLocalReducers(split, cnt, top, mappers, res);
            assert assigned <= cnt;
            remaining += cnt - assigned;
        }
    }
    // Assign the rest reducers.
    if (remaining > 0)
        assignRemoteReducers(remaining, top, mappers, res);
    return res;
}

Also used : HashMap(java.util.HashMap) IdentityHashMap(java.util.IdentityHashMap) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) UUID(java.util.UUID) HashMap(java.util.HashMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) TreeMap(java.util.TreeMap) HadoopIgfsEndpoint(org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)

Example 3 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopTestRoundRobinMrPlanner method preparePlan.

/** {@inheritDoc} */
@Override
public HadoopMapReducePlan preparePlan(HadoopJob job, Collection<ClusterNode> top, @Nullable HadoopMapReducePlan oldPlan) throws IgniteCheckedException {
    if (top.isEmpty())
        throw new IllegalArgumentException("Topology is empty");
    // Has at least one element.
    Iterator<ClusterNode> it = top.iterator();
    Map<UUID, Collection<HadoopInputSplit>> mappers = new HashMap<>();
    for (HadoopInputSplit block : job.input()) {
        ClusterNode node = it.next();
        Collection<HadoopInputSplit> nodeBlocks = mappers.get(node.id());
        if (nodeBlocks == null) {
            nodeBlocks = new ArrayList<>();
            mappers.put(node.id(), nodeBlocks);
        }
        nodeBlocks.add(block);
        if (!it.hasNext())
            it = top.iterator();
    }
    int[] rdc = new int[job.reducers()];
    for (int i = 0; i < rdc.length; i++) rdc[i] = i;
    return new HadoopDefaultMapReducePlan(mappers, Collections.singletonMap(it.next().id(), rdc));
}

Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) HashMap(java.util.HashMap) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopDefaultMapReducePlan(org.apache.ignite.internal.processors.hadoop.planner.HadoopDefaultMapReducePlan) Collection(java.util.Collection) UUID(java.util.UUID)

Example 4 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopV2Job method input.

/** {@inheritDoc} */
@Override
public Collection<HadoopInputSplit> input() {
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader());
    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);
        if (jobDirPath == null) {
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }
        Path jobDir = new Path(jobDirPath);
        try {
            FileSystem fs = fileSystem(jobDir.toUri(), jobConf);
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir);
            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");
            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);
            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);
                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();
                    String[] hosts = metaInfo.getLocations();
                    in.seek(off);
                    String clsName = Text.readString(in);
                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);
                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);
                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }
                return res;
            }
        } catch (Throwable e) {
            if (e instanceof Error)
                throw (Error) e;
            else
                throw transformException(e);
        }
    } catch (IgniteCheckedException e) {
        throw new IgniteException(e);
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) JobSplit(org.apache.hadoop.mapreduce.split.JobSplit) IgniteException(org.apache.ignite.IgniteException) FileSystem(org.apache.hadoop.fs.FileSystem) HadoopClassLoader(org.apache.ignite.internal.processors.hadoop.HadoopClassLoader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) HadoopExternalSplit(org.apache.ignite.internal.processors.hadoop.HadoopExternalSplit)

Example 5 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopV2Splitter method splitJob.

/**
     * @param ctx Job context.
     * @return Collection of mapped splits.
     * @throws IgniteCheckedException If mapping failed.
     */
public static Collection<HadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(), ctx.getConfiguration());
        assert format != null;
        List<InputSplit> splits = format.getSplits(ctx);
        Collection<HadoopInputSplit> res = new ArrayList<>(splits.size());
        int id = 0;
        for (InputSplit nativeSplit : splits) {
            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;
                res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
            } else
                res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));
            id++;
        }
        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new IgniteInterruptedCheckedException(e);
    }
}

Also used : ArrayList(java.util.ArrayList) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) IOException(java.io.IOException) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit)

Aggregations

HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)19 ArrayList (java.util.ArrayList)8 UUID (java.util.UUID)8 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)8 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)7 HashMap (java.util.HashMap)4 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)4 Collection (java.util.Collection)3 Path (org.apache.hadoop.fs.Path)3 IgniteHadoopWeightedMapReducePlanner (org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner)3 HadoopIgfsEndpoint (org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)3 IgfsMock (org.apache.ignite.internal.processors.igfs.IgfsMock)3 IOException (java.io.IOException)2 IdentityHashMap (java.util.IdentityHashMap)2 Map (java.util.Map)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2 HadoopClassLoader (org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)2 HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)2