Search in sources :

Example 16 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopV1MapTask method run.

/**
 * {@inheritDoc}
 */
@SuppressWarnings("unchecked")
@Override
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();
    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;
    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        JobConf jobConf = taskCtx0.jobConf();
        InputFormat inFormat = jobConf.getInputFormat();
        HadoopInputSplit split = info().inputSplit();
        InputSplit nativeSplit;
        if (split instanceof HadoopFileBlock) {
            HadoopFileBlock block = (HadoopFileBlock) split;
            nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), EMPTY_HOSTS);
        } else
            nativeSplit = (InputSplit) taskCtx0.getNativeSplit(split);
        assert nativeSplit != null;
        Reporter reporter = new HadoopV1Reporter(taskCtx, nativeSplit);
        HadoopV1OutputCollector collector = null;
        try {
            collector = collector(jobConf, taskCtx0, !job.info().hasCombiner() && !job.info().hasReducer(), fileName(), taskCtx0.attemptId());
            RecordReader reader = inFormat.getRecordReader(nativeSplit, jobConf, reporter);
            Mapper mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf);
            Object key = reader.createKey();
            Object val = reader.createValue();
            assert mapper != null;
            try {
                try {
                    while (reader.next(key, val)) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Map task cancelled.");
                        mapper.map(key, val, collector, reporter);
                    }
                    taskCtx.onMapperFinished();
                } finally {
                    mapper.close();
                }
            } finally {
                collector.closeWriter();
            }
            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();
            throw new IgniteCheckedException(e);
        }
    } finally {
        HadoopMapperUtils.clearMapperIndex();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Reporter(org.apache.hadoop.mapred.Reporter) RecordReader(org.apache.hadoop.mapred.RecordReader) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) FileSplit(org.apache.hadoop.mapred.FileSplit) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) Mapper(org.apache.hadoop.mapred.Mapper) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) InputFormat(org.apache.hadoop.mapred.InputFormat) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) JobConf(org.apache.hadoop.mapred.JobConf) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) HadoopV2TaskContext(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext)

Example 17 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class IgniteHadoopWeightedMapReducePlanner method assignMappers.

/**
 * Assign mappers to nodes.
 *
 * @param splits Input splits.
 * @param top Topology.
 * @return Mappers.
 * @throws IgniteCheckedException If failed.
 */
private Mappers assignMappers(Collection<HadoopInputSplit> splits, HadoopMapReducePlanTopology top) throws IgniteCheckedException {
    Mappers res = new Mappers();
    for (HadoopInputSplit split : splits) {
        // Try getting IGFS affinity.
        Collection<UUID> nodeIds = affinityNodesForSplit(split, top);
        // Get best node.
        UUID node = bestMapperNode(nodeIds, top);
        assert node != null;
        res.add(split, node);
    }
    return res;
}
Also used : HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) UUID(java.util.UUID)

Example 18 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class IgniteHadoopWeightedMapReducePlanner method preparePlan.

/**
 * {@inheritDoc}
 */
@Override
public HadoopMapReducePlan preparePlan(HadoopJob job, Collection<ClusterNode> nodes, @Nullable HadoopMapReducePlan oldPlan) throws IgniteCheckedException {
    List<HadoopInputSplit> splits = HadoopCommonUtils.sortInputSplits(job.input());
    int reducerCnt = job.reducers();
    if (reducerCnt < 0)
        throw new IgniteCheckedException("Number of reducers must be non-negative, actual: " + reducerCnt);
    HadoopMapReducePlanTopology top = topology(nodes);
    Mappers mappers = assignMappers(splits, top);
    Map<UUID, int[]> reducers = assignReducers(splits, top, mappers, reducerCnt);
    return new HadoopDefaultMapReducePlan(mappers.nodeToSplits, reducers);
}
Also used : HadoopDefaultMapReducePlan(org.apache.ignite.internal.processors.hadoop.planner.HadoopDefaultMapReducePlan) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) UUID(java.util.UUID) HadoopMapReducePlanTopology(org.apache.ignite.internal.processors.hadoop.planner.HadoopMapReducePlanTopology) HadoopIgfsEndpoint(org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)

Example 19 with HadoopInputSplit

use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.

the class HadoopV1Splitter method splitJob.

/**
 * @param jobConf Job configuration.
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<HadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = jobConf.getInputFormat();
        assert format != null;
        InputSplit[] splits = format.getSplits(jobConf, 0);
        Collection<HadoopInputSplit> res = new ArrayList<>(splits.length);
        for (int i = 0; i < splits.length; i++) {
            InputSplit nativeSplit = splits[i];
            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;
                res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
            } else
                res.add(HadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations()));
        }
        return res;
    } catch (IOException e) {
        throw new IgniteCheckedException(e);
    }
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) ArrayList(java.util.ArrayList) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) IOException(java.io.IOException) FileSplit(org.apache.hadoop.mapred.FileSplit) HadoopFileBlock(org.apache.ignite.internal.processors.hadoop.HadoopFileBlock) HadoopInputSplit(org.apache.ignite.hadoop.HadoopInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

HadoopInputSplit (org.apache.ignite.hadoop.HadoopInputSplit)19 ArrayList (java.util.ArrayList)8 UUID (java.util.UUID)8 HadoopFileBlock (org.apache.ignite.internal.processors.hadoop.HadoopFileBlock)8 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)7 HashMap (java.util.HashMap)4 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)4 Collection (java.util.Collection)3 Path (org.apache.hadoop.fs.Path)3 IgniteHadoopWeightedMapReducePlanner (org.apache.ignite.hadoop.mapreduce.IgniteHadoopWeightedMapReducePlanner)3 HadoopIgfsEndpoint (org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)3 IgfsMock (org.apache.ignite.internal.processors.igfs.IgfsMock)3 IOException (java.io.IOException)2 IdentityHashMap (java.util.IdentityHashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2 HadoopClassLoader (org.apache.ignite.internal.processors.hadoop.HadoopClassLoader)2