use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestMultiMRInput method testMultipleSplits.
@Test(timeout = 5000)
public void testMultipleSplits() throws Exception {
Path workDir = new Path(TEST_ROOT_DIR, "testMultipleSplits");
JobConf jobConf = new JobConf(defaultConf);
jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(jobConf, workDir);
InputContext inputContext = createTezInputContext(jobConf);
MultiMRInput input = new MultiMRInput(inputContext, 2);
input.initialize();
AtomicLong inputLength = new AtomicLong();
LinkedHashMap<LongWritable, Text> data = createSplits(2, workDir, jobConf, inputLength);
SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
InputSplit[] splits = format.getSplits(jobConf, 2);
assertEquals(2, splits.length);
MRSplitProto splitProto1 = MRInputHelpers.createSplitProto(splits[0]);
InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto1.toByteString().asReadOnlyByteBuffer());
MRSplitProto splitProto2 = MRInputHelpers.createSplitProto(splits[1]);
InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto2.toByteString().asReadOnlyByteBuffer());
List<Event> eventList = new ArrayList<Event>();
eventList.add(event1);
eventList.add(event2);
input.handleEvents(eventList);
assertReaders(input, data, 2, inputLength.get());
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestMultiMRInput method testNewFormatSplits.
@Test
public void testNewFormatSplits() throws Exception {
Path workDir = new Path(TEST_ROOT_DIR, "testNewFormatSplits");
Job job = Job.getInstance(defaultConf);
job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class);
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, workDir);
Configuration conf = job.getConfiguration();
conf.setBoolean("mapred.mapper.new-api", true);
// Create sequence file.
AtomicLong inputLength = new AtomicLong();
LinkedHashMap<LongWritable, Text> data = createSplits(1, workDir, conf, inputLength);
// Get split information.
org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<LongWritable, Text> format = new org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<>();
List<org.apache.hadoop.mapreduce.InputSplit> splits = format.getSplits(job);
assertEquals(1, splits.size());
// Create the event.
MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits.get(0), new SerializationFactory(conf));
InputDataInformationEvent event = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
// Create input context.
InputContext inputContext = createTezInputContext(conf);
// Create the MR input object and process the event
MultiMRInput input = new MultiMRInput(inputContext, 1);
input.initialize();
input.handleEvents(Collections.<Event>singletonList(event));
assertReaders(input, data, 1, inputLength.get());
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestMultiMRInput method createTezInputContext.
private InputContext createTezInputContext(Configuration conf) throws Exception {
MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder();
builder.setGroupingEnabled(false);
builder.setConfigurationBytes(TezUtils.createByteStringFromConf(conf));
byte[] payload = builder.build().toByteArray();
ApplicationId applicationId = ApplicationId.newInstance(10000, 1);
TezCounters counters = new TezCounters();
InputContext inputContext = mock(InputContext.class);
doReturn(applicationId).when(inputContext).getApplicationId();
doReturn(counters).when(inputContext).getCounters();
doReturn(1).when(inputContext).getDAGAttemptNumber();
doReturn("dagName").when(inputContext).getDAGName();
doReturn(1).when(inputContext).getInputIndex();
doReturn("srcVertexName").when(inputContext).getSourceVertexName();
doReturn(1).when(inputContext).getTaskAttemptNumber();
doReturn(1).when(inputContext).getTaskIndex();
doReturn(1).when(inputContext).getTaskVertexIndex();
doReturn(UUID.randomUUID().toString()).when(inputContext).getUniqueIdentifier();
doReturn("taskVertexName").when(inputContext).getTaskVertexName();
doReturn(UserPayload.create(ByteBuffer.wrap(payload))).when(inputContext).getUserPayload();
return inputContext;
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestKVReadersWithMR method testWithSpecificNumberOfKV_MapReduce.
public void testWithSpecificNumberOfKV_MapReduce(int kvPairs) throws IOException {
InputContext mockContext = mock(InputContext.class);
MRReaderMapReduce reader = new MRReaderMapReduce(conf, counters, inputRecordCounter, -1, 1, 10, 20, 30, mockContext);
reader.recordReader = new DummyRecordReaderMapReduce(kvPairs);
int records = 0;
while (reader.next()) {
records++;
verify(mockContext, times(records)).notifyProgress();
}
assertTrue(kvPairs == records);
// reading again should fail
try {
boolean hasNext = reader.next();
fail();
} catch (IOException e) {
assertTrue(e.getMessage().contains("For usage, please refer to"));
}
}
use of org.apache.tez.runtime.api.InputContext in project tez by apache.
the class TestKVReadersWithMR method testWithSpecificNumberOfKV.
public void testWithSpecificNumberOfKV(int kvPairs) throws IOException {
InputContext mockContext = mock(InputContext.class);
MRReaderMapred reader = new MRReaderMapred(conf, counters, inputRecordCounter, mockContext);
reader.recordReader = new DummyRecordReader(kvPairs);
int records = 0;
while (reader.next()) {
records++;
verify(mockContext, times(records)).notifyProgress();
}
assertTrue(kvPairs == records);
// reading again should fail
try {
boolean hasNext = reader.next();
fail();
} catch (IOException e) {
assertTrue(e.getMessage().contains("For usage, please refer to"));
}
}
Aggregations