use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project jena by apache.
the class AbstractNodeTupleInputFormatTests method testMultipleInputs.
/**
* Runs a multiple input test
*
* @param inputs
* Inputs
* @param expectedSplits
* Number of splits expected
* @param expectedTuples
* Number of tuples expected
* @throws IOException
* @throws InterruptedException
*/
protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples) throws IOException, InterruptedException {
// Prepare configuration and inputs
Configuration config = this.prepareConfiguration();
// Set up fake job
InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
Job job = Job.getInstance(config);
job.setInputFormatClass(inputFormat.getClass());
for (File input : inputs) {
this.addInputPath(input, job.getConfiguration(), job);
}
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length);
NLineInputFormat.setNumLinesPerSplit(job, expectedTuples);
// Check splits
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(expectedSplits, splits.size());
// Check tuples
int count = 0;
for (InputSplit split : splits) {
TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
reader.initialize(split, taskContext);
count += this.countTuples(reader);
}
Assert.assertEquals(expectedTuples, count);
}
use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project cdap by caskdata.
the class StreamInputFormatTest method testFormatStreamRecordReader.
@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
File inputDir = tmpFolder.newFolder();
File partition = new File(inputDir, "1.1000");
partition.mkdirs();
File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
// write 1 event
StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000);
writer.append(streamEvent);
writer.close();
FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap());
Configuration conf = new Configuration();
AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
AbstractStreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
AbstractStreamInputFormat format = new AbstractStreamInputFormat() {
@Override
public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
return new NoOpAuthorizer();
}
@Override
public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
return new AuthenticationTestContext();
}
};
// read all splits and store the results in the list
List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
List<InputSplit> inputSplits = format.getSplits(context);
for (InputSplit split : inputSplits) {
RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format.createRecordReader(split, context);
recordReader.initialize(split, context);
while (recordReader.nextKeyValue()) {
recordsRead.add(recordReader.getCurrentValue());
}
}
// should only have read 1 record
Assert.assertEquals(1, recordsRead.size());
GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
Assert.assertEquals("hello world", eventData.getBody().get("body"));
}
Aggregations