use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project asterixdb by apache.
the class SchedulerTest method testSchedulerLargerHDFS.
/**
* Test the case where the HDFS cluster is a larger than the Hyracks cluster
*
* @throws Exception
*/
public void testSchedulerLargerHDFS() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(6, "nc", "10.0.0.", 5099, 5098, 5097);
List<InputSplit> fileSplits = new ArrayList<>();
fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
fileSplits.add(new FileSplit(new Path("part-2"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-3"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" }));
fileSplits.add(new FileSplit(new Path("part-4"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.6" }));
fileSplits.add(new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-7"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
fileSplits.add(new FileSplit(new Path("part-8"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
fileSplits.add(new FileSplit(new Path("part-9"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" }));
fileSplits.add(new FileSplit(new Path("part-10"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.6" }));
fileSplits.add(new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.7" }));
fileSplits.add(new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" }));
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
String[] expectedResults = new String[] { "nc1", "nc4", "nc6", "nc1", "nc4", "nc2", "nc2", "nc3", "nc6", "nc5", "nc3", "nc5" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project asterixdb by apache.
the class FileSplitsFactory method bytesToSplits.
/**
* Covert bytes to splits.
*
* @param bytes
* @return
* @throws HyracksDataException
*/
private List<FileSplit> bytesToSplits(byte[] bytes) throws HyracksDataException {
try {
Class splitClass = Class.forName(splitClassName);
Constructor[] constructors = splitClass.getDeclaredConstructors();
Constructor defaultConstructor = null;
for (Constructor constructor : constructors) {
if (constructor.getParameterTypes().length == 0) {
constructor.setAccessible(true);
defaultConstructor = constructor;
}
}
ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
DataInputStream dis = new DataInputStream(bis);
int size = dis.readInt();
List<FileSplit> splits = new ArrayList<FileSplit>();
for (int i = 0; i < size; i++) {
splits.add((FileSplit) defaultConstructor.newInstance());
splits.get(i).readFields(dis);
}
dis.close();
return splits;
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project carbondata by apache.
the class CarbonInputFormat method getSplitsInternal.
private List<InputSplit> getSplitsInternal(JobContext job) throws IOException {
List<InputSplit> splits = super.getSplits(job);
List<InputSplit> carbonSplits = new ArrayList<InputSplit>(splits.size());
// identify table blocks
for (InputSplit inputSplit : splits) {
FileSplit fileSplit = (FileSplit) inputSplit;
String segmentId = CarbonTablePath.DataPathUtil.getSegmentId(fileSplit.getPath().toString());
if (segmentId.equals(CarbonCommonConstants.INVALID_SEGMENT_ID)) {
continue;
}
carbonSplits.add(CarbonInputSplit.from(segmentId, fileSplit, ColumnarFormatVersion.valueOf(CarbonCommonConstants.CARBON_DATA_FILE_DEFAULT_VERSION)));
}
return carbonSplits;
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project gora by apache.
the class FileBackedDataStoreBase method getPartitions.
@Override
public List<PartitionQuery<K, T>> getPartitions(Query<K, T> query) {
List<InputSplit> splits = null;
List<PartitionQuery<K, T>> queries = null;
try {
splits = GoraMapReduceUtils.getSplits(getConf(), inputPath);
queries = new ArrayList<>(splits.size());
for (InputSplit split : splits) {
queries.add(new FileSplitPartitionQuery<>(query, (FileSplit) split));
}
} catch (IOException ex) {
LOG.error(ex.getMessage(), ex);
}
return queries;
}
use of org.apache.hadoop.mapreduce.lib.input.FileSplit in project ignite by apache.
the class HadoopV2Splitter method splitJob.
/**
* @param ctx Job context.
* @return Collection of mapped splits.
* @throws IgniteCheckedException If mapping failed.
*/
public static Collection<HadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
try {
InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(), ctx.getConfiguration());
assert format != null;
List<InputSplit> splits = format.getSplits(ctx);
Collection<HadoopInputSplit> res = new ArrayList<>(splits.size());
int id = 0;
for (InputSplit nativeSplit : splits) {
if (nativeSplit instanceof FileSplit) {
FileSplit s = (FileSplit) nativeSplit;
res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
} else
res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));
id++;
}
return res;
} catch (IOException | ClassNotFoundException e) {
throw new IgniteCheckedException(e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IgniteInterruptedCheckedException(e);
}
}
Aggregations