Search in sources :

Example 96 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class JdbcInputFormatTest method testSplitLogic_withSpillOver.

@Test
public void testSplitLogic_withSpillOver() throws HiveJdbcDatabaseAccessException, IOException {
    JdbcInputFormat f = new JdbcInputFormat();
    when(mockDatabaseAccessor.getTotalNumberOfRecords(any(Configuration.class))).thenReturn(15);
    f.setDbAccessor(mockDatabaseAccessor);
    JobConf conf = new JobConf();
    conf.set("mapred.input.dir", "/temp");
    InputSplit[] splits = f.getSplits(conf, 6);
    assertThat(splits, is(notNullValue()));
    assertThat(splits.length, is(6));
    for (int i = 0; i < 3; i++) {
        assertThat(splits[i].getLength(), is(3L));
    }
    for (int i = 3; i < 6; i++) {
        assertThat(splits[i].getLength(), is(2L));
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Test(org.junit.Test)

Example 97 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class PTFRowContainer method first.

@Override
public Row first() throws HiveException {
    Row r = super.first();
    if (blockInfos.size() > 0) {
        InputSplit[] inputSplits = getInputSplits();
        FileSplit fS = null;
        BlockInfo bI = blockInfos.get(0);
        bI.startingSplit = 0;
        int i = 1;
        bI = i < blockInfos.size() ? blockInfos.get(i) : null;
        for (int j = 1; j < inputSplits.length && bI != null; j++) {
            fS = (FileSplit) inputSplits[j];
            while (bI != null && bI.startOffset < fS.getStart()) {
                bI.startingSplit = j - 1;
                i++;
                bI = i < blockInfos.size() ? blockInfos.get(i) : null;
            }
        }
        while (i < blockInfos.size()) {
            bI = blockInfos.get(i);
            bI.startingSplit = inputSplits.length - 1;
            i++;
        }
    }
    currentReadBlockStartRow = 0;
    return r;
}
Also used : FileSplit(org.apache.hadoop.mapred.FileSplit) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 98 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class HiveIndexedInputFormat method doGetSplits.

public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException {
    super.init(job);
    Path[] dirs = FileInputFormat.getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }
    JobConf newjob = new JobConf(job);
    ArrayList<InputSplit> result = new ArrayList<InputSplit>();
    // for each dir, get the InputFormat, and do getSplits.
    PartitionDesc part;
    for (Path dir : dirs) {
        part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true);
        // create a new InputFormat instance if this is the first time to see this
        // class
        Class inputFormatClass = part.getInputFileFormatClass();
        InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
        Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);
        FileInputFormat.setInputPaths(newjob, dir);
        newjob.setInputFormat(inputFormat.getClass());
        InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
        for (InputSplit is : iss) {
            result.add(new HiveInputSplit(is, inputFormatClass.getName()));
        }
    }
    return result.toArray(new HiveInputSplit[result.size()]);
}
Also used : Path(org.apache.hadoop.fs.Path) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ArrayList(java.util.ArrayList) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 99 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class BucketizedHiveInputSplit method write.

@Override
public void write(DataOutput out) throws IOException {
    assert (inputSplits != null && inputSplits.length > 0);
    out.writeUTF(inputSplits[0].getClass().getName());
    out.writeInt(inputSplits.length);
    for (InputSplit inputSplit : inputSplits) {
        inputSplit.write(out);
    }
    out.writeUTF(inputFormatClassName);
}
Also used : HiveInputSplit(org.apache.hadoop.hive.ql.io.HiveInputFormat.HiveInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 100 with InputSplit

use of org.apache.hadoop.mapred.InputSplit in project hive by apache.

the class NullRowsInputFormat method getSplits.

@Override
public InputSplit[] getSplits(JobConf conf, int arg1) throws IOException {
    // It's important to read the correct nulls! (in truth, the path is needed for SplitGrouper).
    String[] paths = conf.getTrimmedStrings(FileInputFormat.INPUT_DIR, (String[]) null);
    if (paths == null)
        throw new IOException("Cannot find path in conf");
    InputSplit[] result = new InputSplit[paths.length];
    for (int i = 0; i < paths.length; ++i) {
        result[i] = new DummyInputSplit(paths[i]);
    }
    return result;
}
Also used : IOException(java.io.IOException) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

InputSplit (org.apache.hadoop.mapred.InputSplit)161 Path (org.apache.hadoop.fs.Path)57 JobConf (org.apache.hadoop.mapred.JobConf)56 Test (org.junit.Test)49 IOException (java.io.IOException)47 ArrayList (java.util.ArrayList)29 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)27 FileSplit (org.apache.hadoop.mapred.FileSplit)24 FileSystem (org.apache.hadoop.fs.FileSystem)21 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)21 InputFormat (org.apache.hadoop.mapred.InputFormat)19 RecordWriter (org.apache.hadoop.mapred.RecordWriter)19 NullWritable (org.apache.hadoop.io.NullWritable)18 Text (org.apache.hadoop.io.Text)18 Configuration (org.apache.hadoop.conf.Configuration)14 LongWritable (org.apache.hadoop.io.LongWritable)11 FileInputFormat (org.apache.hadoop.mapred.FileInputFormat)10 Properties (java.util.Properties)9 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)9 HashMap (java.util.HashMap)8