Search in sources :

Example 26 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project mongo-hadoop by mongodb.

the class GridFSInputFormatTest method testReadWholeFile.

@Test
public void testReadWholeFile() throws IOException, InterruptedException {
    Configuration conf = getConfiguration();
    MongoConfigUtil.setGridFSWholeFileSplit(conf, true);
    JobContext jobContext = mockJobContext(conf);
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    // Empty delimiter == no delimiter.
    MongoConfigUtil.setGridFSDelimiterPattern(conf, "#+");
    TaskAttemptContext context = mockTaskAttemptContext(conf);
    assertEquals(1, splits.size());
    List<String> sections = new ArrayList<String>();
    for (InputSplit split : splits) {
        GridFSInputFormat.GridFSTextRecordReader reader = new GridFSInputFormat.GridFSTextRecordReader();
        reader.initialize(split, context);
        int i;
        for (i = 0; reader.nextKeyValue(); ++i) {
            sections.add(reader.getCurrentValue().toString());
        }
    }
    assertEquals(Arrays.asList(readmeSections), sections);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test) BaseHadoopTest(com.mongodb.hadoop.testutils.BaseHadoopTest)

Example 27 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project mongo-hadoop by mongodb.

the class GridFSInputFormatTest method getSplits.

private List<InputSplit> getSplits() throws IOException, InterruptedException {
    JobContext context = mock(JobContext.class);
    when(context.getConfiguration()).thenReturn(getConfiguration());
    return inputFormat.getSplits(context);
}
Also used : JobContext(org.apache.hadoop.mapreduce.JobContext)

Example 28 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.

the class HCatInputFormatBase method createInputSplits.

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
    JobContext jobContext = new JobContextImpl(configuration, new JobID());
    List<InputSplit> splits;
    try {
        splits = this.hCatInputFormat.getSplits(jobContext);
    } catch (InterruptedException e) {
        throw new IOException("Could not get Splits.", e);
    }
    HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
    for (int i = 0; i < hadoopInputSplits.length; i++) {
        hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
    }
    return hadoopInputSplits;
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) HadoopInputSplit(org.apache.flink.api.java.hadoop.mapreduce.wrapper.HadoopInputSplit) JobContext(org.apache.hadoop.mapreduce.JobContext) IOException(java.io.IOException) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HadoopInputSplit(org.apache.flink.api.java.hadoop.mapreduce.wrapper.HadoopInputSplit) JobID(org.apache.hadoop.mapreduce.JobID)

Example 29 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.

the class HadoopOutputFormatBase method finalizeGlobal.

@Override
public void finalizeGlobal(int parallelism) throws IOException {
    JobContext jobContext;
    TaskAttemptContext taskContext;
    try {
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(1).length()) + "s", " ").replace(" ", "0") + Integer.toString(1) + "_0");
        jobContext = new JobContextImpl(this.configuration, new JobID());
        taskContext = new TaskAttemptContextImpl(this.configuration, taskAttemptID);
        this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(taskContext);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    jobContext.getCredentials().addAll(this.credentials);
    Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
    if (currentUserCreds != null) {
        jobContext.getCredentials().addAll(currentUserCreds);
    }
    // finalize HDFS output format
    if (this.outputCommitter != null) {
        this.outputCommitter.commitJob(jobContext);
    }
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) JobID(org.apache.hadoop.mapreduce.JobID) IOException(java.io.IOException) Credentials(org.apache.hadoop.security.Credentials)

Example 30 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hbase by apache.

the class TestMultiTableInputFormatBase method testMRSplitsConnectionCount.

/**
 * Test getSplits only puts up one Connection.
 * In past it has put up many Connections. Each Connection setup comes with a fresh new cache
 * so we have to do fresh hit on hbase:meta. Should only do one Connection when doing getSplits
 * even if a MultiTableInputFormat.
 * @throws IOException
 */
@Test
public void testMRSplitsConnectionCount() throws IOException {
    // Make instance of MTIFB.
    MultiTableInputFormatBase mtif = new MultiTableInputFormatBase() {

        @Override
        public RecordReader<ImmutableBytesWritable, Result> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            return super.createRecordReader(split, context);
        }
    };
    // Pass it a mocked JobContext. Make the JC return our Configuration.
    // Load the Configuration so it returns our special Connection so we can interpolate
    // canned responses.
    JobContext mockedJobContext = Mockito.mock(JobContext.class);
    Configuration c = HBaseConfiguration.create();
    c.set(ConnectionUtils.HBASE_CLIENT_CONNECTION_IMPL, MRSplitsConnection.class.getName());
    Mockito.when(mockedJobContext.getConfiguration()).thenReturn(c);
    // Invent a bunch of scans. Have each Scan go against a different table so a good spread.
    List<Scan> scans = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        Scan scan = new Scan();
        String tableName = this.name.getMethodName() + i;
        scan.setAttribute(SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
        scans.add(scan);
    }
    mtif.setScans(scans);
    // Get splits. Assert that that more than one.
    List<InputSplit> splits = mtif.getSplits(mockedJobContext);
    Assert.assertTrue(splits.size() > 0);
    // Assert only one Connection was made (see the static counter we have in the mocked
    // Connection MRSplitsConnection Constructor.
    Assert.assertEquals(1, MRSplitsConnection.creations.get());
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Result(org.apache.hadoop.hbase.client.Result) Scan(org.apache.hadoop.hbase.client.Scan) JobContext(org.apache.hadoop.mapreduce.JobContext) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Aggregations

JobContext (org.apache.hadoop.mapreduce.JobContext)85 Configuration (org.apache.hadoop.conf.Configuration)41 Job (org.apache.hadoop.mapreduce.Job)35 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)34 Test (org.junit.Test)31 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)29 InputSplit (org.apache.hadoop.mapreduce.InputSplit)28 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)25 Path (org.apache.hadoop.fs.Path)24 IOException (java.io.IOException)22 File (java.io.File)19 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)16 ArrayList (java.util.ArrayList)13 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)11 JobConf (org.apache.hadoop.mapred.JobConf)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 LongWritable (org.apache.hadoop.io.LongWritable)9 MapFile (org.apache.hadoop.io.MapFile)9 JobID (org.apache.hadoop.mapreduce.JobID)7 FileSystem (org.apache.hadoop.fs.FileSystem)6