use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project mongo-hadoop by mongodb.
the class BSONSplitterTest method testGetStartingPositionForSplit.
@Test
public void testGetStartingPositionForSplit() throws IOException {
String inventoryPathString = INVENTORY_BSON.toString();
Path inventoryPath = new Path(inventoryPathString);
Configuration conf = new Configuration();
BSONSplitter splitter = new BSONSplitter();
splitter.setInputPath(inventoryPath);
// This is a very small value for maxsize and will result in many
// splits being created.
conf.setLong("mapreduce.input.fileinputformat.split.maxsize", 2000L);
splitter.setConf(conf);
// Test without splits file.
FileSplit fileSplit = new FileSplit(inventoryPath, 2000L, 100L, new String[] {});
// Writing the split file is enabled by default, so this will cause
// the BSONSplitter to create the splits file for later tests.
assertEquals(2130L, splitter.getStartingPositionForSplit(fileSplit));
// Test with splits file, which was created by the previous call to
// getStartingPositionForSplit.
assertEquals(2130L, splitter.getStartingPositionForSplit(fileSplit));
// Test with reading the splits file disabled.
MongoConfigUtil.setBSONReadSplits(conf, false);
assertEquals(2130L, splitter.getStartingPositionForSplit(fileSplit));
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project Plume by tdunning.
the class MSCRMapper method map.
@SuppressWarnings("unchecked")
protected void map(WritableComparable key, WritableComparable value, final Mapper<WritableComparable, WritableComparable, PlumeObject, PlumeObject>.Context<WritableComparable, WritableComparable, PlumeObject, PlumeObject> context) throws IOException, InterruptedException {
LazyCollection<?> l = null;
FileSplit fS = FileInputSplitWrapper.getFileInputSplit(context);
// Get LazyCollection for this input (according to FileSplit)
for (PCollection<?> input : mscr.getInputs()) {
LazyCollection<?> thisL = (LazyCollection<?>) input;
if (thisL.getFile() == null) {
// Convention for intermediate results
thisL.setFile(tmpFolder + "/" + thisL.getPlumeId());
}
if (fS.getPath().toString().startsWith(thisL.getFile()) || fS.getPath().toString().startsWith("file:" + thisL.getFile())) {
l = thisL;
break;
}
}
if (l == null) {
throw new RuntimeException("Unable to match input split with any MSCR input");
}
// If this collection is a table -> process Pair, otherwise process value
PCollectionType type = l.getType();
Object toProcess = value;
if (type instanceof PTableType) {
toProcess = Pair.create(key, value);
}
for (DeferredOp op : l.getDownOps()) {
if (op instanceof MultipleParallelDo) {
MultipleParallelDo mPDo = ((MultipleParallelDo) op);
for (Object entry : mPDo.getDests().entrySet()) {
Map.Entry<PCollection, DoFn> en = (Map.Entry<PCollection, DoFn>) entry;
LazyCollection<?> lCol = (LazyCollection<?>) en.getKey();
DeferredOp childOp = null;
if (lCol.getDownOps() != null && lCol.getDownOps().size() > 0) {
childOp = lCol.getDownOps().get(0);
}
final Integer channel;
if (childOp != null && childOp instanceof Flatten) {
channel = mscr.getNumberedChannels().get(((Flatten) childOp).getDest());
} else if (childOp != null && childOp instanceof GroupByKey) {
channel = mscr.getNumberedChannels().get(((GroupByKey) childOp).getOrigin());
} else {
// bypass channel?
channel = mscr.getNumberedChannels().get(en.getKey());
}
if (channel == null) {
// This is not for this MSCR - just skip it
return;
}
// Call parallelDo function
en.getValue().process(toProcess, new EmitFn() {
@Override
public void emit(Object v) {
try {
if (v instanceof Pair) {
Pair p = (Pair) v;
context.write(new PlumeObject((WritableComparable) p.getKey(), channel), new PlumeObject((WritableComparable) p.getValue(), channel));
} else {
context.write(new PlumeObject((WritableComparable) v, channel), new PlumeObject((WritableComparable) v, channel));
}
} catch (Exception e) {
// TODO How to report this
e.printStackTrace();
}
}
});
}
} else {
if (op instanceof Flatten) {
l = (LazyCollection) ((Flatten) op).getDest();
}
int channel = mscr.getNumberedChannels().get(l);
if (toProcess instanceof Pair) {
context.write(new PlumeObject(key, channel), new PlumeObject(value, channel));
} else {
context.write(new PlumeObject(value, channel), new PlumeObject(value, channel));
}
}
}
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project hadoop-pcap by RIPE-NCC.
the class PcapInputFormat method createRecordReader.
@Override
public RecordReader<LongWritable, ObjectWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit) split;
Path path = fileSplit.getPath();
LOG.info("Reading PCAP: " + path.toString());
long start = 0L;
long length = fileSplit.getLength();
return initPcapRecordReader(path, start, length, context);
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project asterixdb by apache.
the class SchedulerTest method testSchedulerSmallerHDFSOdd.
/**
* Test the case where the HDFS cluster is a larger than the Hyracks cluster
*
* @throws Exception
*/
public void testSchedulerSmallerHDFSOdd() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(6, "nc", "10.0.0.", 5099, 5098, 5097);
List<InputSplit> fileSplits = new ArrayList<>();
fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
fileSplits.add(new FileSplit(new Path("part-2"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-3"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.3" }));
fileSplits.add(new FileSplit(new Path("part-4"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.3" }));
fileSplits.add(new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-7"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
fileSplits.add(new FileSplit(new Path("part-8"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-9"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.1" }));
fileSplits.add(new FileSplit(new Path("part-10"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.2" }));
fileSplits.add(new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-13"), 0, 0, new String[] { "10.0.0.2", "10.0.0.4", "10.0.0.5" }));
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc1", "nc5", "nc2", "nc4" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project asterixdb by apache.
the class SchedulerTest method testSchedulerSimple.
/**
* Test the scheduler for the case when the Hyracks cluster is the HDFS cluster
*
* @throws Exception
*/
public void testSchedulerSimple() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(6, "nc", "10.0.0.", 5099, 5098, 5097);
List<InputSplit> fileSplits = new ArrayList<>();
fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
fileSplits.add(new FileSplit(new Path("part-2"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-3"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" }));
fileSplits.add(new FileSplit(new Path("part-4"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.6" }));
fileSplits.add(new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" }));
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
String[] expectedResults = new String[] { "nc1", "nc4", "nc6", "nc2", "nc3", "nc5" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
}
Aggregations