use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.
the class IgniteHadoopWeightedMapReducePlanner method assignReducersToSplits.
/**
* Distribute reducers between splits.
*
* @param splits Splits.
* @param reducerCnt Reducer count.
* @return Map from input split to reducer count.
*/
private Map<HadoopInputSplit, Integer> assignReducersToSplits(Collection<HadoopInputSplit> splits, int reducerCnt) {
Map<HadoopInputSplit, Integer> res = new IdentityHashMap<>(splits.size());
int base = reducerCnt / splits.size();
int remainder = reducerCnt % splits.size();
for (HadoopInputSplit split : splits) {
int val = base;
if (remainder > 0) {
val++;
remainder--;
}
res.put(split, val);
}
assert remainder == 0;
return res;
}
use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.
the class IgniteHadoopWeightedMapReducePlanner method assignReducers0.
/**
* Generate reducers.
*
* @param top Topology.
* @param splits Input splits.
* @param mappers Mappers.
* @param reducerCnt Reducer count.
* @return Reducers.
*/
private Map<UUID, Integer> assignReducers0(HadoopMapReducePlanTopology top, Collection<HadoopInputSplit> splits, Mappers mappers, int reducerCnt) {
Map<UUID, Integer> res = new HashMap<>();
// Assign reducers to splits.
Map<HadoopInputSplit, Integer> splitToReducerCnt = assignReducersToSplits(splits, reducerCnt);
// Assign as much local reducers as possible.
int remaining = 0;
for (Map.Entry<HadoopInputSplit, Integer> entry : splitToReducerCnt.entrySet()) {
HadoopInputSplit split = entry.getKey();
int cnt = entry.getValue();
if (cnt > 0) {
int assigned = assignLocalReducers(split, cnt, top, mappers, res);
assert assigned <= cnt;
remaining += cnt - assigned;
}
}
// Assign the rest reducers.
if (remaining > 0)
assignRemoteReducers(remaining, top, mappers, res);
return res;
}
use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.
the class HadoopTestRoundRobinMrPlanner method preparePlan.
/** {@inheritDoc} */
@Override
public HadoopMapReducePlan preparePlan(HadoopJob job, Collection<ClusterNode> top, @Nullable HadoopMapReducePlan oldPlan) throws IgniteCheckedException {
if (top.isEmpty())
throw new IllegalArgumentException("Topology is empty");
// Has at least one element.
Iterator<ClusterNode> it = top.iterator();
Map<UUID, Collection<HadoopInputSplit>> mappers = new HashMap<>();
for (HadoopInputSplit block : job.input()) {
ClusterNode node = it.next();
Collection<HadoopInputSplit> nodeBlocks = mappers.get(node.id());
if (nodeBlocks == null) {
nodeBlocks = new ArrayList<>();
mappers.put(node.id(), nodeBlocks);
}
nodeBlocks.add(block);
if (!it.hasNext())
it = top.iterator();
}
int[] rdc = new int[job.reducers()];
for (int i = 0; i < rdc.length; i++) rdc[i] = i;
return new HadoopDefaultMapReducePlan(mappers, Collections.singletonMap(it.next().id(), rdc));
}
use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.
the class HadoopV2Job method input.
/** {@inheritDoc} */
@Override
public Collection<HadoopInputSplit> input() {
ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader());
try {
String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);
if (jobDirPath == null) {
// Assume that we have needed classes and try to generate input splits ourself.
if (jobConf.getUseNewMapper())
return HadoopV2Splitter.splitJob(jobCtx);
else
return HadoopV1Splitter.splitJob(jobConf);
}
Path jobDir = new Path(jobDirPath);
try {
FileSystem fs = fileSystem(jobDir.toUri(), jobConf);
JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir);
if (F.isEmpty(metaInfos))
throw new IgniteCheckedException("No input splits found.");
Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);
try (FSDataInputStream in = fs.open(splitsFile)) {
Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);
for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
long off = metaInfo.getStartOffset();
String[] hosts = metaInfo.getLocations();
in.seek(off);
String clsName = Text.readString(in);
HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);
if (block == null)
block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);
res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
}
return res;
}
} catch (Throwable e) {
if (e instanceof Error)
throw (Error) e;
else
throw transformException(e);
}
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
} finally {
HadoopCommonUtils.restoreContextClassLoader(oldLdr);
}
}
use of org.apache.ignite.hadoop.HadoopInputSplit in project ignite by apache.
the class HadoopV2Splitter method splitJob.
/**
* @param ctx Job context.
* @return Collection of mapped splits.
* @throws IgniteCheckedException If mapping failed.
*/
public static Collection<HadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
try {
InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(), ctx.getConfiguration());
assert format != null;
List<InputSplit> splits = format.getSplits(ctx);
Collection<HadoopInputSplit> res = new ArrayList<>(splits.size());
int id = 0;
for (InputSplit nativeSplit : splits) {
if (nativeSplit instanceof FileSplit) {
FileSplit s = (FileSplit) nativeSplit;
res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
} else
res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));
id++;
}
return res;
} catch (IOException | ClassNotFoundException e) {
throw new IgniteCheckedException(e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IgniteInterruptedCheckedException(e);
}
}
Aggregations