use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TestMRInputSplitDistributor method testSerializedPayload.
@Test(timeout = 5000)
public void testSerializedPayload() throws IOException {
Configuration conf = new Configuration(false);
conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, true);
ByteString confByteString = TezUtils.createByteStringFromConf(conf);
InputSplit split1 = new InputSplitForTest(1);
InputSplit split2 = new InputSplitForTest(2);
MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
splitsProtoBuilder.addSplits(proto1);
splitsProtoBuilder.addSplits(proto2);
MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
payloadProto.setSplits(splitsProtoBuilder.build());
payloadProto.setConfigurationBytes(confByteString);
UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
List<Event> events = splitDist.initialize();
assertEquals(3, events.size());
assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
assertTrue(events.get(1) instanceof InputDataInformationEvent);
assertTrue(events.get(2) instanceof InputDataInformationEvent);
InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
assertNull(diEvent1.getDeserializedUserPayload());
assertNull(diEvent2.getDeserializedUserPayload());
assertNotNull(diEvent1.getUserPayload());
assertNotNull(diEvent2.getUserPayload());
MRSplitProto event1Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent1.getUserPayload()));
InputSplit is1 = MRInputUtils.getOldSplitDetailsFromEvent(event1Proto, new Configuration());
assertTrue(is1 instanceof InputSplitForTest);
assertEquals(1, ((InputSplitForTest) is1).identifier);
MRSplitProto event2Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent2.getUserPayload()));
InputSplit is2 = MRInputUtils.getOldSplitDetailsFromEvent(event2Proto, new Configuration());
assertTrue(is2 instanceof InputSplitForTest);
assertEquals(2, ((InputSplitForTest) is2).identifier);
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TestMRInputSplitDistributor method testDeserializedPayload.
@Test(timeout = 5000)
public void testDeserializedPayload() throws IOException {
Configuration conf = new Configuration(false);
conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, false);
ByteString confByteString = TezUtils.createByteStringFromConf(conf);
InputSplit split1 = new InputSplitForTest(1);
InputSplit split2 = new InputSplitForTest(2);
MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
splitsProtoBuilder.addSplits(proto1);
splitsProtoBuilder.addSplits(proto2);
MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
payloadProto.setSplits(splitsProtoBuilder.build());
payloadProto.setConfigurationBytes(confByteString);
UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
List<Event> events = splitDist.initialize();
assertEquals(3, events.size());
assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
assertTrue(events.get(1) instanceof InputDataInformationEvent);
assertTrue(events.get(2) instanceof InputDataInformationEvent);
InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
assertNull(diEvent1.getUserPayload());
assertNull(diEvent2.getUserPayload());
assertNotNull(diEvent1.getDeserializedUserPayload());
assertNotNull(diEvent2.getDeserializedUserPayload());
assertTrue(diEvent1.getDeserializedUserPayload() instanceof InputSplitForTest);
assertEquals(1, ((InputSplitForTest) diEvent1.getDeserializedUserPayload()).identifier);
assertTrue(diEvent2.getDeserializedUserPayload() instanceof InputSplitForTest);
assertEquals(2, ((InputSplitForTest) diEvent2.getDeserializedUserPayload()).identifier);
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TestMultiMRInput method testSingleSplit.
@Test(timeout = 5000)
public void testSingleSplit() throws Exception {
Path workDir = new Path(TEST_ROOT_DIR, "testSingleSplit");
JobConf jobConf = new JobConf(defaultConf);
jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(jobConf, workDir);
InputContext inputContext = createTezInputContext(jobConf);
MultiMRInput input = new MultiMRInput(inputContext, 1);
input.initialize();
AtomicLong inputLength = new AtomicLong();
LinkedHashMap<LongWritable, Text> data = createSplits(1, workDir, jobConf, inputLength);
SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
InputSplit[] splits = format.getSplits(jobConf, 1);
assertEquals(1, splits.length);
MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits[0]);
InputDataInformationEvent event = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
List<Event> eventList = new ArrayList<Event>();
eventList.add(event);
input.handleEvents(eventList);
assertReaders(input, data, 1, inputLength.get());
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TestMultiMRInput method testMultipleSplits.
@Test(timeout = 5000)
public void testMultipleSplits() throws Exception {
Path workDir = new Path(TEST_ROOT_DIR, "testMultipleSplits");
JobConf jobConf = new JobConf(defaultConf);
jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(jobConf, workDir);
InputContext inputContext = createTezInputContext(jobConf);
MultiMRInput input = new MultiMRInput(inputContext, 2);
input.initialize();
AtomicLong inputLength = new AtomicLong();
LinkedHashMap<LongWritable, Text> data = createSplits(2, workDir, jobConf, inputLength);
SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
InputSplit[] splits = format.getSplits(jobConf, 2);
assertEquals(2, splits.length);
MRSplitProto splitProto1 = MRInputHelpers.createSplitProto(splits[0]);
InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto1.toByteString().asReadOnlyByteBuffer());
MRSplitProto splitProto2 = MRInputHelpers.createSplitProto(splits[1]);
InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto2.toByteString().asReadOnlyByteBuffer());
List<Event> eventList = new ArrayList<Event>();
eventList.add(event1);
eventList.add(event2);
input.handleEvents(eventList);
assertReaders(input, data, 2, inputLength.get());
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TestMultiMRInput method testNewFormatSplits.
@Test
public void testNewFormatSplits() throws Exception {
Path workDir = new Path(TEST_ROOT_DIR, "testNewFormatSplits");
Job job = Job.getInstance(defaultConf);
job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class);
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, workDir);
Configuration conf = job.getConfiguration();
conf.setBoolean("mapred.mapper.new-api", true);
// Create sequence file.
AtomicLong inputLength = new AtomicLong();
LinkedHashMap<LongWritable, Text> data = createSplits(1, workDir, conf, inputLength);
// Get split information.
org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<LongWritable, Text> format = new org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<>();
List<org.apache.hadoop.mapreduce.InputSplit> splits = format.getSplits(job);
assertEquals(1, splits.size());
// Create the event.
MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits.get(0), new SerializationFactory(conf));
InputDataInformationEvent event = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
// Create input context.
InputContext inputContext = createTezInputContext(conf);
// Create the MR input object and process the event
MultiMRInput input = new MultiMRInput(inputContext, 1);
input.initialize();
input.handleEvents(Collections.<Event>singletonList(event));
assertReaders(input, data, 1, inputLength.get());
}
Aggregations