use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class JdbcInputFormatTest method testSplitLogic_withSpillOver.
@Test
public void testSplitLogic_withSpillOver() throws HiveJdbcDatabaseAccessException, IOException {
JdbcInputFormat f = new JdbcInputFormat();
when(mockDatabaseAccessor.getTotalNumberOfRecords(any(Configuration.class))).thenReturn(15);
f.setDbAccessor(mockDatabaseAccessor);
JobConf conf = new JobConf();
conf.set("mapred.input.dir", "/temp");
InputSplit[] splits = f.getSplits(conf, 6);
assertThat(splits, is(notNullValue()));
assertThat(splits.length, is(6));
for (int i = 0; i < 3; i++) {
assertThat(splits[i].getLength(), is(3L));
}
for (int i = 3; i < 6; i++) {
assertThat(splits[i].getLength(), is(2L));
}
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class PTFRowContainer method first.
@Override
public Row first() throws HiveException {
Row r = super.first();
if (blockInfos.size() > 0) {
InputSplit[] inputSplits = getInputSplits();
FileSplit fS = null;
BlockInfo bI = blockInfos.get(0);
bI.startingSplit = 0;
int i = 1;
bI = i < blockInfos.size() ? blockInfos.get(i) : null;
for (int j = 1; j < inputSplits.length && bI != null; j++) {
fS = (FileSplit) inputSplits[j];
while (bI != null && bI.startOffset < fS.getStart()) {
bI.startingSplit = j - 1;
i++;
bI = i < blockInfos.size() ? blockInfos.get(i) : null;
}
}
while (i < blockInfos.size()) {
bI = blockInfos.get(i);
bI.startingSplit = inputSplits.length - 1;
i++;
}
}
currentReadBlockStartRow = 0;
return r;
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class HiveIndexedInputFormat method doGetSplits.
public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException {
super.init(job);
Path[] dirs = FileInputFormat.getInputPaths(job);
if (dirs.length == 0) {
throw new IOException("No input paths specified in job");
}
JobConf newjob = new JobConf(job);
ArrayList<InputSplit> result = new ArrayList<InputSplit>();
// for each dir, get the InputFormat, and do getSplits.
PartitionDesc part;
for (Path dir : dirs) {
part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true);
// create a new InputFormat instance if this is the first time to see this
// class
Class inputFormatClass = part.getInputFileFormatClass();
InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);
FileInputFormat.setInputPaths(newjob, dir);
newjob.setInputFormat(inputFormat.getClass());
InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
for (InputSplit is : iss) {
result.add(new HiveInputSplit(is, inputFormatClass.getName()));
}
}
return result.toArray(new HiveInputSplit[result.size()]);
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class BucketizedHiveInputSplit method write.
@Override
public void write(DataOutput out) throws IOException {
assert (inputSplits != null && inputSplits.length > 0);
out.writeUTF(inputSplits[0].getClass().getName());
out.writeInt(inputSplits.length);
for (InputSplit inputSplit : inputSplits) {
inputSplit.write(out);
}
out.writeUTF(inputFormatClassName);
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class NullRowsInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf conf, int arg1) throws IOException {
// It's important to read the correct nulls! (in truth, the path is needed for SplitGrouper).
String[] paths = conf.getTrimmedStrings(FileInputFormat.INPUT_DIR, (String[]) null);
if (paths == null)
throw new IOException("Cannot find path in conf");
InputSplit[] result = new InputSplit[paths.length];
for (int i = 0; i < paths.length; ++i) {
result[i] = new DummyInputSplit(paths[i]);
}
return result;
}
Aggregations