Search in sources :

Example 6 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestMRInputSplitDistributor method testSerializedPayload.

@Test(timeout = 5000)
public void testSerializedPayload() throws IOException {
    Configuration conf = new Configuration(false);
    conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, true);
    ByteString confByteString = TezUtils.createByteStringFromConf(conf);
    InputSplit split1 = new InputSplitForTest(1);
    InputSplit split2 = new InputSplitForTest(2);
    MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
    MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
    MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
    splitsProtoBuilder.addSplits(proto1);
    splitsProtoBuilder.addSplits(proto2);
    MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
    payloadProto.setSplits(splitsProtoBuilder.build());
    payloadProto.setConfigurationBytes(confByteString);
    UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
    List<Event> events = splitDist.initialize();
    assertEquals(3, events.size());
    assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
    assertTrue(events.get(1) instanceof InputDataInformationEvent);
    assertTrue(events.get(2) instanceof InputDataInformationEvent);
    InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
    InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
    assertNull(diEvent1.getDeserializedUserPayload());
    assertNull(diEvent2.getDeserializedUserPayload());
    assertNotNull(diEvent1.getUserPayload());
    assertNotNull(diEvent2.getUserPayload());
    MRSplitProto event1Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent1.getUserPayload()));
    InputSplit is1 = MRInputUtils.getOldSplitDetailsFromEvent(event1Proto, new Configuration());
    assertTrue(is1 instanceof InputSplitForTest);
    assertEquals(1, ((InputSplitForTest) is1).identifier);
    MRSplitProto event2Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent2.getUserPayload()));
    InputSplit is2 = MRInputUtils.getOldSplitDetailsFromEvent(event2Proto, new Configuration());
    assertTrue(is2 instanceof InputSplitForTest);
    assertEquals(2, ((InputSplitForTest) is2).identifier);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 7 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestMRInputSplitDistributor method testDeserializedPayload.

@Test(timeout = 5000)
public void testDeserializedPayload() throws IOException {
    Configuration conf = new Configuration(false);
    conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, false);
    ByteString confByteString = TezUtils.createByteStringFromConf(conf);
    InputSplit split1 = new InputSplitForTest(1);
    InputSplit split2 = new InputSplitForTest(2);
    MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
    MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
    MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
    splitsProtoBuilder.addSplits(proto1);
    splitsProtoBuilder.addSplits(proto2);
    MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
    payloadProto.setSplits(splitsProtoBuilder.build());
    payloadProto.setConfigurationBytes(confByteString);
    UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
    List<Event> events = splitDist.initialize();
    assertEquals(3, events.size());
    assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
    assertTrue(events.get(1) instanceof InputDataInformationEvent);
    assertTrue(events.get(2) instanceof InputDataInformationEvent);
    InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
    InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
    assertNull(diEvent1.getUserPayload());
    assertNull(diEvent2.getUserPayload());
    assertNotNull(diEvent1.getDeserializedUserPayload());
    assertNotNull(diEvent2.getDeserializedUserPayload());
    assertTrue(diEvent1.getDeserializedUserPayload() instanceof InputSplitForTest);
    assertEquals(1, ((InputSplitForTest) diEvent1.getDeserializedUserPayload()).identifier);
    assertTrue(diEvent2.getDeserializedUserPayload() instanceof InputSplitForTest);
    assertEquals(2, ((InputSplitForTest) diEvent2.getDeserializedUserPayload()).identifier);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 8 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestMultiMRInput method testSingleSplit.

@Test(timeout = 5000)
public void testSingleSplit() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testSingleSplit");
    JobConf jobConf = new JobConf(defaultConf);
    jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(jobConf, workDir);
    InputContext inputContext = createTezInputContext(jobConf);
    MultiMRInput input = new MultiMRInput(inputContext, 1);
    input.initialize();
    AtomicLong inputLength = new AtomicLong();
    LinkedHashMap<LongWritable, Text> data = createSplits(1, workDir, jobConf, inputLength);
    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(jobConf, 1);
    assertEquals(1, splits.length);
    MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits[0]);
    InputDataInformationEvent event = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
    List<Event> eventList = new ArrayList<Event>();
    eventList.add(event);
    input.handleEvents(eventList);
    assertReaders(input, data, 1, inputLength.get());
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) InputContext(org.apache.tez.runtime.api.InputContext) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) AtomicLong(java.util.concurrent.atomic.AtomicLong) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 9 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestMultiMRInput method testMultipleSplits.

@Test(timeout = 5000)
public void testMultipleSplits() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testMultipleSplits");
    JobConf jobConf = new JobConf(defaultConf);
    jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(jobConf, workDir);
    InputContext inputContext = createTezInputContext(jobConf);
    MultiMRInput input = new MultiMRInput(inputContext, 2);
    input.initialize();
    AtomicLong inputLength = new AtomicLong();
    LinkedHashMap<LongWritable, Text> data = createSplits(2, workDir, jobConf, inputLength);
    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(jobConf, 2);
    assertEquals(2, splits.length);
    MRSplitProto splitProto1 = MRInputHelpers.createSplitProto(splits[0]);
    InputDataInformationEvent event1 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto1.toByteString().asReadOnlyByteBuffer());
    MRSplitProto splitProto2 = MRInputHelpers.createSplitProto(splits[1]);
    InputDataInformationEvent event2 = InputDataInformationEvent.createWithSerializedPayload(0, splitProto2.toByteString().asReadOnlyByteBuffer());
    List<Event> eventList = new ArrayList<Event>();
    eventList.add(event1);
    eventList.add(event2);
    input.handleEvents(eventList);
    assertReaders(input, data, 2, inputLength.get());
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) InputContext(org.apache.tez.runtime.api.InputContext) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) AtomicLong(java.util.concurrent.atomic.AtomicLong) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 10 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestMultiMRInput method testNewFormatSplits.

@Test
public void testNewFormatSplits() throws Exception {
    Path workDir = new Path(TEST_ROOT_DIR, "testNewFormatSplits");
    Job job = Job.getInstance(defaultConf);
    job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class);
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, workDir);
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapred.mapper.new-api", true);
    // Create sequence file.
    AtomicLong inputLength = new AtomicLong();
    LinkedHashMap<LongWritable, Text> data = createSplits(1, workDir, conf, inputLength);
    // Get split information.
    org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<LongWritable, Text> format = new org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat<>();
    List<org.apache.hadoop.mapreduce.InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size());
    // Create the event.
    MRSplitProto splitProto = MRInputHelpers.createSplitProto(splits.get(0), new SerializationFactory(conf));
    InputDataInformationEvent event = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
    // Create input context.
    InputContext inputContext = createTezInputContext(conf);
    // Create the MR input object and process the event
    MultiMRInput input = new MultiMRInput(inputContext, 1);
    input.initialize();
    input.handleEvents(Collections.<Event>singletonList(event));
    assertReaders(input, data, 1, inputLength.get());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) InputContext(org.apache.tez.runtime.api.InputContext) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) Text(org.apache.hadoop.io.Text) AtomicLong(java.util.concurrent.atomic.AtomicLong) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Aggregations

InputDataInformationEvent (org.apache.tez.runtime.api.events.InputDataInformationEvent)22 Event (org.apache.tez.runtime.api.Event)16 MRSplitProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)8 InputSplit (org.apache.hadoop.mapred.InputSplit)7 JobConf (org.apache.hadoop.mapred.JobConf)5 InputContext (org.apache.tez.runtime.api.InputContext)5 InputConfigureVertexTasksEvent (org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent)5 LinkedList (java.util.LinkedList)4 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 SequenceFileInputFormat (org.apache.hadoop.mapred.SequenceFileInputFormat)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 MRInputUserPayloadProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto)4 MRSplitsProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto)4 ByteString (com.google.protobuf.ByteString)3 ArrayList (java.util.ArrayList)3