use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class IgniteHadoopWeightedMapReducePlanner method igfsAffinityNodesForSplit.
/**
* Get IGFS affinity nodes for split if possible.
* <p>
* Order in the returned collection *is* significant, meaning that nodes containing more data
* go first. This way, the 1st nodes in the collection considered to be preferable for scheduling.
*
* @param split Input split.
* @return IGFS affinity or {@code null} if IGFS is not available.
* @throws IgniteCheckedException If failed.
*/
@Nullable
private Collection<UUID> igfsAffinityNodesForSplit(HadoopInputSplit split) throws IgniteCheckedException {
if (split instanceof HadoopFileBlock) {
HadoopFileBlock split0 = (HadoopFileBlock) split;
if (IgniteFileSystem.IGFS_SCHEME.equalsIgnoreCase(split0.file().getScheme())) {
HadoopIgfsEndpoint endpoint = new HadoopIgfsEndpoint(split0.file().getAuthority());
IgfsEx igfs = (IgfsEx) ((IgniteEx) ignite).igfsx(endpoint.igfs());
if (igfs != null && !igfs.isProxy(split0.file())) {
IgfsPath path = new IgfsPath(split0.file());
if (igfs.exists(path)) {
Collection<IgfsBlockLocation> blocks;
try {
blocks = igfs.affinity(path, split0.start(), split0.length());
} catch (IgniteException e) {
throw new IgniteCheckedException("Failed to get IGFS file block affinity [path=" + path + ", start=" + split0.start() + ", len=" + split0.length() + ']', e);
}
assert blocks != null;
if (blocks.size() == 1)
return blocks.iterator().next().nodeIds();
else {
// The most "local" nodes go first.
Map<UUID, Long> idToLen = new HashMap<>();
for (IgfsBlockLocation block : blocks) {
for (UUID id : block.nodeIds()) {
Long len = idToLen.get(id);
idToLen.put(id, len == null ? block.length() : block.length() + len);
}
}
// Sort the nodes in non-ascending order by contained data lengths.
Map<NodeIdAndLength, UUID> res = new TreeMap<>();
for (Map.Entry<UUID, Long> idToLenEntry : idToLen.entrySet()) {
UUID id = idToLenEntry.getKey();
res.put(new NodeIdAndLength(id, idToLenEntry.getValue()), id);
}
return new LinkedHashSet<>(res.values());
}
}
}
}
}
return null;
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class HadoopTasksAllVersionsTest method testAllTasks.
/**
* Tests all job in complex.
* Runs 2 chains of map-combine tasks and sends result into one reduce task.
*
* @throws Exception If fails.
*/
@SuppressWarnings("ConstantConditions")
public void testAllTasks() throws Exception {
IgfsPath inDir = new IgfsPath(PATH_INPUT);
igfs.mkdirs(inDir);
IgfsPath inFile = new IgfsPath(inDir, HadoopWordCount2.class.getSimpleName() + "-input");
URI inFileUri = URI.create(igfsScheme() + inFile.toString());
generateTestFile(inFile.toString(), "red", 100, "blue", 200, "green", 150, "yellow", 70);
// Split file into two blocks
long fileLen = igfs.info(inFile).length();
Long l = fileLen / 2;
HadoopFileBlock fileBlock1 = new HadoopFileBlock(HOSTS, inFileUri, 0, l);
HadoopFileBlock fileBlock2 = new HadoopFileBlock(HOSTS, inFileUri, l, fileLen - l);
HadoopJobEx gridJob = getHadoopJob(inFileUri.toString(), igfsScheme() + PATH_OUTPUT);
HadoopTestTaskContext combine1Ctx = runMapCombineTask(fileBlock1, gridJob);
HadoopTestTaskContext combine2Ctx = runMapCombineTask(fileBlock2, gridJob);
// Prepare input for combine
HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.REDUCE, gridJob.id(), 0, 0, null);
HadoopTestTaskContext reduceCtx = new HadoopTestTaskContext(taskInfo, gridJob);
reduceCtx.makeTreeOfWritables(combine1Ctx.mockOutput());
reduceCtx.makeTreeOfWritables(combine2Ctx.mockOutput());
reduceCtx.run();
reduceCtx.taskInfo(new HadoopTaskInfo(HadoopTaskType.COMMIT, gridJob.id(), 0, 0, null));
reduceCtx.run();
assertEquals("blue\t200\n" + "green\t150\n" + "red\t100\n" + "yellow\t70\n", readAndSortFile(PATH_OUTPUT + "/" + getOutputFileNamePrefix() + "00000"));
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class HadoopTasksAllVersionsTest method testMapTask.
/**
* Tests map task execution.
*
* @throws Exception If fails.
*/
@SuppressWarnings("ConstantConditions")
public void testMapTask() throws Exception {
IgfsPath inDir = new IgfsPath(PATH_INPUT);
igfs.mkdirs(inDir);
IgfsPath inFile = new IgfsPath(inDir, HadoopWordCount2.class.getSimpleName() + "-input");
URI inFileUri = URI.create(igfsScheme() + inFile.toString());
try (PrintWriter pw = new PrintWriter(igfs.create(inFile, true))) {
pw.println("hello0 world0");
pw.println("world1 hello1");
}
HadoopFileBlock fileBlock1 = new HadoopFileBlock(HOSTS, inFileUri, 0, igfs.info(inFile).length() - 1);
try (PrintWriter pw = new PrintWriter(igfs.append(inFile, false))) {
pw.println("hello2 world2");
pw.println("world3 hello3");
}
HadoopFileBlock fileBlock2 = new HadoopFileBlock(HOSTS, inFileUri, fileBlock1.length(), igfs.info(inFile).length() - fileBlock1.length());
HadoopJobEx gridJob = getHadoopJob(igfsScheme() + inFile.toString(), igfsScheme() + PATH_OUTPUT);
HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock1);
HadoopTestTaskContext ctx = new HadoopTestTaskContext(taskInfo, gridJob);
ctx.mockOutput().clear();
ctx.run();
assertEquals("hello0,1; world0,1; world1,1; hello1,1", Joiner.on("; ").join(ctx.mockOutput()));
ctx.mockOutput().clear();
ctx.taskInfo(new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock2));
ctx.run();
assertEquals("hello2,1; world2,1; world3,1; hello3,1", Joiner.on("; ").join(ctx.mockOutput()));
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class HadoopWeightedMapReducePlannerTest method testHdfsSplitsAffinity.
/**
* Test one HDFS splits.
*
* @throws Exception If failed.
*/
public void testHdfsSplitsAffinity() throws Exception {
IgfsMock igfs = LocationsBuilder.create().add(0, NODE_1).add(50, NODE_2).add(100, NODE_3).buildIgfs();
final List<HadoopInputSplit> splits = new ArrayList<>();
splits.add(new HadoopFileBlock(new String[] { HOST_1 }, URI.create("hfds://" + HOST_1 + "/x"), 0, 50));
splits.add(new HadoopFileBlock(new String[] { HOST_2 }, URI.create("hfds://" + HOST_2 + "/x"), 50, 100));
splits.add(new HadoopFileBlock(new String[] { HOST_3 }, URI.create("hfds://" + HOST_3 + "/x"), 100, 37));
// The following splits belong to hosts that are out of Ignite topology at all.
// This means that these splits should be assigned to any least loaded modes:
splits.add(new HadoopFileBlock(new String[] { HOST_4 }, URI.create("hfds://" + HOST_4 + "/x"), 138, 2));
splits.add(new HadoopFileBlock(new String[] { HOST_5 }, URI.create("hfds://" + HOST_5 + "/x"), 140, 3));
final int expReducers = 7;
HadoopPlannerMockJob job = new HadoopPlannerMockJob(splits, expReducers);
IgniteHadoopWeightedMapReducePlanner planner = createPlanner(igfs);
final HadoopMapReducePlan plan = planner.preparePlan(job, NODES, null);
checkPlanMappers(plan, splits, NODES, true);
checkPlanReducers(plan, NODES, expReducers, true);
}
use of org.apache.ignite.internal.processors.hadoop.HadoopFileBlock in project ignite by apache.
the class HadoopV2Context method getInputSplit.
/**
* {@inheritDoc}
*/
@Override
public InputSplit getInputSplit() {
if (inputSplit == null) {
HadoopInputSplit split = ctx.taskInfo().inputSplit();
if (split == null)
return null;
if (split instanceof HadoopFileBlock) {
HadoopFileBlock fileBlock = (HadoopFileBlock) split;
inputSplit = new FileSplit(new Path(fileBlock.file()), fileBlock.start(), fileBlock.length(), null);
} else {
try {
inputSplit = (InputSplit) ((HadoopV2TaskContext) ctx).getNativeSplit(split);
} catch (IgniteCheckedException e) {
throw new IllegalStateException(e);
}
}
}
return inputSplit;
}
Aggregations