use of org.apache.hadoop.mapreduce.JobContext in project mongo-hadoop by mongodb.
the class GridFSInputFormatTest method testReadWholeFile.
@Test
public void testReadWholeFile() throws IOException, InterruptedException {
Configuration conf = getConfiguration();
MongoConfigUtil.setGridFSWholeFileSplit(conf, true);
JobContext jobContext = mockJobContext(conf);
List<InputSplit> splits = inputFormat.getSplits(jobContext);
// Empty delimiter == no delimiter.
MongoConfigUtil.setGridFSDelimiterPattern(conf, "#+");
TaskAttemptContext context = mockTaskAttemptContext(conf);
assertEquals(1, splits.size());
List<String> sections = new ArrayList<String>();
for (InputSplit split : splits) {
GridFSInputFormat.GridFSTextRecordReader reader = new GridFSInputFormat.GridFSTextRecordReader();
reader.initialize(split, context);
int i;
for (i = 0; reader.nextKeyValue(); ++i) {
sections.add(reader.getCurrentValue().toString());
}
}
assertEquals(Arrays.asList(readmeSections), sections);
}
use of org.apache.hadoop.mapreduce.JobContext in project mongo-hadoop by mongodb.
the class GridFSInputFormatTest method getSplits.
private List<InputSplit> getSplits() throws IOException, InterruptedException {
JobContext context = mock(JobContext.class);
when(context.getConfiguration()).thenReturn(getConfiguration());
return inputFormat.getSplits(context);
}
use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.
the class HCatInputFormatBase method createInputSplits.
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
List<InputSplit> splits;
try {
splits = this.hCatInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++) {
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
use of org.apache.hadoop.mapreduce.JobContext in project flink by apache.
the class HadoopOutputFormatBase method finalizeGlobal.
@Override
public void finalizeGlobal(int parallelism) throws IOException {
JobContext jobContext;
TaskAttemptContext taskContext;
try {
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(1).length()) + "s", " ").replace(" ", "0") + Integer.toString(1) + "_0");
jobContext = new JobContextImpl(this.configuration, new JobID());
taskContext = new TaskAttemptContextImpl(this.configuration, taskAttemptID);
this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(taskContext);
} catch (Exception e) {
throw new RuntimeException(e);
}
jobContext.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
jobContext.getCredentials().addAll(currentUserCreds);
}
// finalize HDFS output format
if (this.outputCommitter != null) {
this.outputCommitter.commitJob(jobContext);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project hbase by apache.
the class TestMultiTableInputFormatBase method testMRSplitsConnectionCount.
/**
* Test getSplits only puts up one Connection.
* In past it has put up many Connections. Each Connection setup comes with a fresh new cache
* so we have to do fresh hit on hbase:meta. Should only do one Connection when doing getSplits
* even if a MultiTableInputFormat.
* @throws IOException
*/
@Test
public void testMRSplitsConnectionCount() throws IOException {
// Make instance of MTIFB.
MultiTableInputFormatBase mtif = new MultiTableInputFormatBase() {
@Override
public RecordReader<ImmutableBytesWritable, Result> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
return super.createRecordReader(split, context);
}
};
// Pass it a mocked JobContext. Make the JC return our Configuration.
// Load the Configuration so it returns our special Connection so we can interpolate
// canned responses.
JobContext mockedJobContext = Mockito.mock(JobContext.class);
Configuration c = HBaseConfiguration.create();
c.set(ConnectionUtils.HBASE_CLIENT_CONNECTION_IMPL, MRSplitsConnection.class.getName());
Mockito.when(mockedJobContext.getConfiguration()).thenReturn(c);
// Invent a bunch of scans. Have each Scan go against a different table so a good spread.
List<Scan> scans = new ArrayList<>();
for (int i = 0; i < 10; i++) {
Scan scan = new Scan();
String tableName = this.name.getMethodName() + i;
scan.setAttribute(SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
scans.add(scan);
}
mtif.setScans(scans);
// Get splits. Assert that that more than one.
List<InputSplit> splits = mtif.getSplits(mockedJobContext);
Assert.assertTrue(splits.size() > 0);
// Assert only one Connection was made (see the static counter we have in the mocked
// Connection MRSplitsConnection Constructor.
Assert.assertEquals(1, MRSplitsConnection.creations.get());
}
Aggregations