use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class IgniteHadoopWeightedMapReducePlanner method affinityNodesForSplit.
/**
* Get affinity nodes for the given input split.
* <p>
* Order in the returned collection *is* significant, meaning that nodes containing more data
* go first. This way, the 1st nodes in the collection considered to be preferable for scheduling.
*
* @param split Split.
* @param top Topology.
* @return Affintiy nodes.
* @throws IgniteCheckedException If failed.
*/
private Collection<UUID> affinityNodesForSplit(HadoopInputSplit split, HadoopMapReducePlanTopology top) throws IgniteCheckedException {
Collection<UUID> igfsNodeIds = igfsAffinityNodesForSplit(split);
if (igfsNodeIds != null)
return igfsNodeIds;
Map<NodeIdAndLength, UUID> res = new TreeMap<>();
for (String host : split.hosts()) {
long len = split instanceof HadoopFileBlock ? ((HadoopFileBlock) split).length() : 0L;
HadoopMapReducePlanGroup grp = top.groupForHost(host);
if (grp != null) {
for (int i = 0; i < grp.nodeCount(); i++) {
UUID nodeId = grp.nodeId(i);
res.put(new NodeIdAndLength(nodeId, len), nodeId);
}
}
}
return new LinkedHashSet<>(res.values());
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class IgniteHadoopWeightedMapReducePlanner method igfsAffinityNodesForSplit.
/**
* Get IGFS affinity nodes for split if possible.
* <p>
* Order in the returned collection *is* significant, meaning that nodes containing more data
* go first. This way, the 1st nodes in the collection considered to be preferable for scheduling.
*
* @param split Input split.
* @return IGFS affinity or {@code null} if IGFS is not available.
* @throws IgniteCheckedException If failed.
*/
@Nullable
private Collection<UUID> igfsAffinityNodesForSplit(HadoopInputSplit split) throws IgniteCheckedException {
if (split instanceof HadoopFileBlock) {
HadoopFileBlock split0 = (HadoopFileBlock) split;
if (IgniteFileSystem.IGFS_SCHEME.equalsIgnoreCase(split0.file().getScheme())) {
HadoopIgfsEndpoint endpoint = new HadoopIgfsEndpoint(split0.file().getAuthority());
IgfsEx igfs = (IgfsEx) ((IgniteEx) ignite).igfsx(endpoint.igfs());
if (igfs != null && !igfs.isProxy(split0.file())) {
IgfsPath path = new IgfsPath(split0.file());
if (igfs.exists(path)) {
Collection<IgfsBlockLocation> blocks;
try {
blocks = igfs.affinity(path, split0.start(), split0.length());
} catch (IgniteException e) {
throw new IgniteCheckedException("Failed to get IGFS file block affinity [path=" + path + ", start=" + split0.start() + ", len=" + split0.length() + ']', e);
}
assert blocks != null;
if (blocks.size() == 1)
return blocks.iterator().next().nodeIds();
else {
// The most "local" nodes go first.
Map<UUID, Long> idToLen = new HashMap<>();
for (IgfsBlockLocation block : blocks) {
for (UUID id : block.nodeIds()) {
Long len = idToLen.get(id);
idToLen.put(id, len == null ? block.length() : block.length() + len);
}
}
// Sort the nodes in non-ascending order by contained data lengths.
Map<NodeIdAndLength, UUID> res = new TreeMap<>();
for (Map.Entry<UUID, Long> idToLenEntry : idToLen.entrySet()) {
UUID id = idToLenEntry.getKey();
res.put(new NodeIdAndLength(id, idToLenEntry.getValue()), id);
}
return new LinkedHashSet<>(res.values());
}
}
}
}
}
return null;
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class HadoopV2Job method input.
/** {@inheritDoc} */
@Override
public Collection<HadoopInputSplit> input() {
ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader());
try {
String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);
if (jobDirPath == null) {
// Assume that we have needed classes and try to generate input splits ourself.
if (jobConf.getUseNewMapper())
return HadoopV2Splitter.splitJob(jobCtx);
else
return HadoopV1Splitter.splitJob(jobConf);
}
Path jobDir = new Path(jobDirPath);
try {
FileSystem fs = fileSystem(jobDir.toUri(), jobConf);
JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir);
if (F.isEmpty(metaInfos))
throw new IgniteCheckedException("No input splits found.");
Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);
try (FSDataInputStream in = fs.open(splitsFile)) {
Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);
for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
long off = metaInfo.getStartOffset();
String[] hosts = metaInfo.getLocations();
in.seek(off);
String clsName = Text.readString(in);
HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);
if (block == null)
block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);
res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
}
return res;
}
} catch (Throwable e) {
if (e instanceof Error)
throw (Error) e;
else
throw transformException(e);
}
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
} finally {
HadoopCommonUtils.restoreContextClassLoader(oldLdr);
}
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class HadoopV2Splitter method splitJob.
/**
* @param ctx Job context.
* @return Collection of mapped splits.
* @throws IgniteCheckedException If mapping failed.
*/
public static Collection<HadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
try {
InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(), ctx.getConfiguration());
assert format != null;
List<InputSplit> splits = format.getSplits(ctx);
Collection<HadoopInputSplit> res = new ArrayList<>(splits.size());
int id = 0;
for (InputSplit nativeSplit : splits) {
if (nativeSplit instanceof FileSplit) {
FileSplit s = (FileSplit) nativeSplit;
res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
} else
res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));
id++;
}
return res;
} catch (IOException | ClassNotFoundException e) {
throw new IgniteCheckedException(e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IgniteInterruptedCheckedException(e);
}
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class HadoopWeightedMapReducePlannerTest method testHdfsSplitsReplication.
/**
* Test HDFS splits with Replication == 3.
*
* @throws Exception If failed.
*/
public void testHdfsSplitsReplication() throws Exception {
IgfsMock igfs = LocationsBuilder.create().add(0, NODE_1).add(50, NODE_2).add(100, NODE_3).buildIgfs();
final List<HadoopInputSplit> splits = new ArrayList<>();
splits.add(new HadoopFileBlock(new String[] { HOST_1, HOST_2, HOST_3 }, URI.create("hfds://" + HOST_1 + "/x"), 0, 50));
splits.add(new HadoopFileBlock(new String[] { HOST_2, HOST_3, HOST_4 }, URI.create("hfds://" + HOST_2 + "/x"), 50, 100));
splits.add(new HadoopFileBlock(new String[] { HOST_3, HOST_4, HOST_5 }, URI.create("hfds://" + HOST_3 + "/x"), 100, 37));
// The following splits belong to hosts that are out of Ignite topology at all.
// This means that these splits should be assigned to any least loaded modes:
splits.add(new HadoopFileBlock(new String[] { HOST_4, HOST_5, HOST_1 }, URI.create("hfds://" + HOST_4 + "/x"), 138, 2));
splits.add(new HadoopFileBlock(new String[] { HOST_5, HOST_1, HOST_2 }, URI.create("hfds://" + HOST_5 + "/x"), 140, 3));
final int expReducers = 8;
HadoopPlannerMockJob job = new HadoopPlannerMockJob(splits, expReducers);
IgniteHadoopWeightedMapReducePlanner planner = createPlanner(igfs);
final HadoopMapReducePlan plan = planner.preparePlan(job, NODES, null);
checkPlanMappers(plan, splits, NODES, true);
checkPlanReducers(plan, NODES, expReducers, true);
}
Aggregations