Search in sources :

Example 1 with InputUpdatePayloadEvent

use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.

the class RootInputVertexManager method onRootVertexInitialized.

@Override
public void onRootVertexInitialized(String inputName, InputDescriptor inputDescriptor, List<Event> events) {
    List<InputDataInformationEvent> riEvents = Lists.newLinkedList();
    boolean dataInformationEventSeen = false;
    for (Event event : events) {
        if (event instanceof InputConfigureVertexTasksEvent) {
            // No tasks should have been started yet. Checked by initial state check.
            Preconditions.checkState(dataInformationEventSeen == false);
            Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) == -1, "Parallelism for the vertex should be set to -1 if the InputInitializer is setting parallelism" + ", VertexName: " + getContext().getVertexName());
            Preconditions.checkState(configuredInputName == null, "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName: " + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
            configuredInputName = inputName;
            InputConfigureVertexTasksEvent cEvent = (InputConfigureVertexTasksEvent) event;
            Map<String, InputSpecUpdate> rootInputSpecUpdate = new HashMap<String, InputSpecUpdate>();
            rootInputSpecUpdate.put(inputName, cEvent.getInputSpecUpdate() == null ? InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate() : cEvent.getInputSpecUpdate());
            getContext().reconfigureVertex(rootInputSpecUpdate, cEvent.getLocationHint(), cEvent.getNumTasks());
        }
        if (event instanceof InputUpdatePayloadEvent) {
            // No tasks should have been started yet. Checked by initial state check.
            Preconditions.checkState(dataInformationEventSeen == false);
            inputDescriptor.setUserPayload(UserPayload.create(((InputUpdatePayloadEvent) event).getUserPayload()));
        } else if (event instanceof InputDataInformationEvent) {
            dataInformationEventSeen = true;
            // # Tasks should have been set by this point.
            Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) != 0);
            Preconditions.checkState(configuredInputName == null || configuredInputName.equals(inputName), "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName:" + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
            configuredInputName = inputName;
            InputDataInformationEvent rEvent = (InputDataInformationEvent) event;
            // 1:1 routing
            rEvent.setTargetIndex(rEvent.getSourceIndex());
            riEvents.add(rEvent);
        }
    }
    getContext().addRootInputEvents(inputName, riEvents);
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 2 with InputUpdatePayloadEvent

use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.

the class TestMRInputSplitDistributor method testSerializedPayload.

@Test(timeout = 5000)
public void testSerializedPayload() throws IOException {
    Configuration conf = new Configuration(false);
    conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, true);
    ByteString confByteString = TezUtils.createByteStringFromConf(conf);
    InputSplit split1 = new InputSplitForTest(1);
    InputSplit split2 = new InputSplitForTest(2);
    MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
    MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
    MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
    splitsProtoBuilder.addSplits(proto1);
    splitsProtoBuilder.addSplits(proto2);
    MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
    payloadProto.setSplits(splitsProtoBuilder.build());
    payloadProto.setConfigurationBytes(confByteString);
    UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
    List<Event> events = splitDist.initialize();
    assertEquals(3, events.size());
    assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
    assertTrue(events.get(1) instanceof InputDataInformationEvent);
    assertTrue(events.get(2) instanceof InputDataInformationEvent);
    InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
    InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
    assertNull(diEvent1.getDeserializedUserPayload());
    assertNull(diEvent2.getDeserializedUserPayload());
    assertNotNull(diEvent1.getUserPayload());
    assertNotNull(diEvent2.getUserPayload());
    MRSplitProto event1Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent1.getUserPayload()));
    InputSplit is1 = MRInputUtils.getOldSplitDetailsFromEvent(event1Proto, new Configuration());
    assertTrue(is1 instanceof InputSplitForTest);
    assertEquals(1, ((InputSplitForTest) is1).identifier);
    MRSplitProto event2Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent2.getUserPayload()));
    InputSplit is2 = MRInputUtils.getOldSplitDetailsFromEvent(event2Proto, new Configuration());
    assertTrue(is2 instanceof InputSplitForTest);
    assertEquals(2, ((InputSplitForTest) is2).identifier);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 3 with InputUpdatePayloadEvent

use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.

the class TestMRInputSplitDistributor method testDeserializedPayload.

@Test(timeout = 5000)
public void testDeserializedPayload() throws IOException {
    Configuration conf = new Configuration(false);
    conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, false);
    ByteString confByteString = TezUtils.createByteStringFromConf(conf);
    InputSplit split1 = new InputSplitForTest(1);
    InputSplit split2 = new InputSplitForTest(2);
    MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
    MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
    MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
    splitsProtoBuilder.addSplits(proto1);
    splitsProtoBuilder.addSplits(proto2);
    MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
    payloadProto.setSplits(splitsProtoBuilder.build());
    payloadProto.setConfigurationBytes(confByteString);
    UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
    List<Event> events = splitDist.initialize();
    assertEquals(3, events.size());
    assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
    assertTrue(events.get(1) instanceof InputDataInformationEvent);
    assertTrue(events.get(2) instanceof InputDataInformationEvent);
    InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
    InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
    assertNull(diEvent1.getUserPayload());
    assertNull(diEvent2.getUserPayload());
    assertNotNull(diEvent1.getDeserializedUserPayload());
    assertNotNull(diEvent2.getDeserializedUserPayload());
    assertTrue(diEvent1.getDeserializedUserPayload() instanceof InputSplitForTest);
    assertEquals(1, ((InputSplitForTest) diEvent1.getDeserializedUserPayload()).identifier);
    assertTrue(diEvent2.getDeserializedUserPayload() instanceof InputSplitForTest);
    assertEquals(2, ((InputSplitForTest) diEvent2.getDeserializedUserPayload()).identifier);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Example 4 with InputUpdatePayloadEvent

use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.

the class MRInputSplitDistributor method initialize.

@Override
public List<Event> initialize() throws IOException {
    StopWatch sw = new StopWatch().start();
    MRInputUserPayloadProto userPayloadProto = MRInputHelpers.parseMRInputPayload(getContext().getInputUserPayload());
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time to parse MRInput payload into prot: " + sw.now(TimeUnit.MILLISECONDS));
    }
    Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());
    JobConf jobConf = new JobConf(conf);
    boolean useNewApi = jobConf.getUseNewMapper();
    sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
    LOG.info("Emitting serialized splits: " + sendSerializedEvents);
    this.splitsProto = userPayloadProto.getSplits();
    MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
    updatedPayloadBuilder.clearSplits();
    List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
    InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent.create(updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());
    events.add(updatePayloadEvent);
    int count = 0;
    for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {
        InputDataInformationEvent diEvent;
        if (sendSerializedEvents) {
            // Unnecessary array copy, can be avoided by using ByteBuffer instead of
            // a raw array.
            diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
        } else {
            if (useNewApi) {
                org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils.getNewSplitDetailsFromEvent(mrSplit, conf);
                diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit);
            } else {
                org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils.getOldSplitDetailsFromEvent(mrSplit, conf);
                diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit);
            }
        }
        events.add(diEvent);
    }
    return events;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) StopWatch(org.apache.tez.util.StopWatch) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) JobConf(org.apache.hadoop.mapred.JobConf) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Aggregations

Event (org.apache.tez.runtime.api.Event)4 InputDataInformationEvent (org.apache.tez.runtime.api.events.InputDataInformationEvent)4 InputUpdatePayloadEvent (org.apache.tez.runtime.api.events.InputUpdatePayloadEvent)4 Configuration (org.apache.hadoop.conf.Configuration)3 MRInputUserPayloadProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto)3 MRSplitProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)3 ByteString (com.google.protobuf.ByteString)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 UserPayload (org.apache.tez.dag.api.UserPayload)2 MRSplitsProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto)2 InputInitializerContext (org.apache.tez.runtime.api.InputInitializerContext)2 Test (org.junit.Test)2 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 JobConf (org.apache.hadoop.mapred.JobConf)1 InputSpecUpdate (org.apache.tez.runtime.api.InputSpecUpdate)1 InputConfigureVertexTasksEvent (org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent)1 InputInitializerEvent (org.apache.tez.runtime.api.events.InputInitializerEvent)1 VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)1 StopWatch (org.apache.tez.util.StopWatch)1