use of org.apache.hadoop.mapreduce.InputSplit in project akela by mozilla-metrics.
the class MultiScanTableInputFormat method getSplits.
/* (non-Javadoc)
* @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
*/
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
if (table == null) {
throw new IOException("No table was provided.");
}
Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
throw new IOException("Expecting at least one region.");
}
Set<InputSplit> splits = new HashSet<InputSplit>();
for (int i = 0; i < keys.getFirst().length; i++) {
String regionLocation = table.getRegionLocation(keys.getFirst()[i]).getServerAddress().getHostname();
for (Scan s : scans) {
byte[] startRow = s.getStartRow();
byte[] stopRow = s.getStopRow();
// determine if the given start an stop key fall into the region
if ((startRow.length == 0 || keys.getSecond()[i].length == 0 || Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) && (stopRow.length == 0 || Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
byte[] splitStart = startRow.length == 0 || Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys.getFirst()[i] : startRow;
byte[] splitStop = (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys.getSecond()[i] : stopRow;
InputSplit split = new TableSplit(table.getTableName(), splitStart, splitStop, regionLocation);
splits.add(split);
}
}
}
return new ArrayList<InputSplit>(splits);
}
use of org.apache.hadoop.mapreduce.InputSplit in project flink by apache.
the class HCatInputFormatBase method createInputSplits.
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = null;
try {
jobContext = HadoopUtils.instantiateJobContext(configuration, new JobID());
} catch (Exception e) {
throw new RuntimeException(e);
}
List<InputSplit> splits;
try {
splits = this.hCatInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++) {
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
use of org.apache.hadoop.mapreduce.InputSplit in project alluxio by Alluxio.
the class KeyValueInputFormat method getSplits.
/**
* Returns a list of {@link KeyValueInputSplit} where each split is one key-value partition.
*
* @param jobContext MapReduce job configuration
* @return list of {@link InputSplit}s, each split is a partition
* @throws IOException if information about the partition cannot be retrieved
*/
@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
// The paths are MapReduce program's inputs specified in
// {@code mapreduce.input.fileinputformat.inputdir}, each path should be a key-value store.
Path[] paths = FileInputFormat.getInputPaths(jobContext);
List<InputSplit> splits = new ArrayList<>();
try {
for (Path path : paths) {
List<PartitionInfo> partitionInfos = mKeyValueMasterClient.getPartitionInfo(new AlluxioURI(path.toString()));
for (PartitionInfo partitionInfo : partitionInfos) {
splits.add(new KeyValueInputSplit(partitionInfo));
}
}
} catch (AlluxioException e) {
throw new IOException(e);
}
return splits;
}
use of org.apache.hadoop.mapreduce.InputSplit in project beam by apache.
the class HadoopInputFormatIOTest method testComputeSplitsIfGetSplitsReturnsEmptyList.
/**
* This test validates behavior of
* {@link HadoopInputFormatBoundedSource#computeSplitsIfNecessary() computeSplits()} when Hadoop
* InputFormat's {@link InputFormat#getSplits(JobContext)} returns empty list.
*/
@Test
public void testComputeSplitsIfGetSplitsReturnsEmptyList() throws Exception {
InputFormat<?, ?> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
SerializableSplit mockInputSplit = Mockito.mock(SerializableSplit.class);
Mockito.when(mockInputFormat.getSplits(Mockito.any(JobContext.class))).thenReturn(new ArrayList<InputSplit>());
HadoopInputFormatBoundedSource<Text, Employee> hifSource = new HadoopInputFormatBoundedSource<Text, Employee>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, mockInputSplit);
thrown.expect(IOException.class);
thrown.expectMessage("Error in computing splits, getSplits() returns a empty list");
hifSource.setInputFormatObj(mockInputFormat);
hifSource.computeSplitsIfNecessary();
}
use of org.apache.hadoop.mapreduce.InputSplit in project beam by apache.
the class HadoopInputFormatIOTest method testComputeSplitsIfGetSplitsReturnsListHavingNullValues.
/**
* This test validates behavior of
* {@link HadoopInputFormatBoundedSource#computeSplitsIfNecessary() computeSplits()} if Hadoop
* InputFormat's {@link InputFormat#getSplits() getSplits()} returns InputSplit list having some
* null values.
*/
@Test
public void testComputeSplitsIfGetSplitsReturnsListHavingNullValues() throws Exception {
// InputSplit list having null value.
InputSplit mockInputSplit = Mockito.mock(InputSplit.class, Mockito.withSettings().extraInterfaces(Writable.class));
List<InputSplit> inputSplitList = new ArrayList<InputSplit>();
inputSplitList.add(mockInputSplit);
inputSplitList.add(null);
InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
Mockito.when(mockInputFormat.getSplits(Mockito.any(JobContext.class))).thenReturn(inputSplitList);
HadoopInputFormatBoundedSource<Text, Employee> hifSource = new HadoopInputFormatBoundedSource<Text, Employee>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
null, // No value translation required.
null, new SerializableSplit());
thrown.expect(IOException.class);
thrown.expectMessage("Error in computing splits, split is null in InputSplits list populated " + "by getSplits() : ");
hifSource.setInputFormatObj(mockInputFormat);
hifSource.computeSplitsIfNecessary();
}
Aggregations