Search in sources :

Example 16 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class MRInputSplitDistributor method initialize.

@Override
public List<Event> initialize() throws IOException {
    StopWatch sw = new StopWatch().start();
    MRInputUserPayloadProto userPayloadProto = MRInputHelpers.parseMRInputPayload(getContext().getInputUserPayload());
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time to parse MRInput payload into prot: " + sw.now(TimeUnit.MILLISECONDS));
    }
    Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());
    JobConf jobConf = new JobConf(conf);
    boolean useNewApi = jobConf.getUseNewMapper();
    sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
    LOG.info("Emitting serialized splits: " + sendSerializedEvents);
    this.splitsProto = userPayloadProto.getSplits();
    MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
    updatedPayloadBuilder.clearSplits();
    List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
    InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent.create(updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());
    events.add(updatePayloadEvent);
    int count = 0;
    for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {
        InputDataInformationEvent diEvent;
        if (sendSerializedEvents) {
            // Unnecessary array copy, can be avoided by using ByteBuffer instead of
            // a raw array.
            diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
        } else {
            if (useNewApi) {
                org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils.getNewSplitDetailsFromEvent(mrSplit, conf);
                diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit);
            } else {
                org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils.getOldSplitDetailsFromEvent(mrSplit, conf);
                diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit);
            }
        }
        events.add(diEvent);
    }
    return events;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) StopWatch(org.apache.tez.util.StopWatch) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) JobConf(org.apache.hadoop.mapred.JobConf) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 17 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class MRInput method handleEvents.

@Override
public void handleEvents(List<Event> inputEvents) throws Exception {
    if (getNumPhysicalInputs() == 0) {
        throw new IllegalStateException("Unexpected event. MRInput has been setup to receive 0 events");
    }
    if (eventReceived || inputEvents.size() != 1) {
        throw new IllegalStateException("MRInput expects only a single input. Received: current eventListSize: " + inputEvents.size() + "Received previous input: " + eventReceived);
    }
    Event event = inputEvents.iterator().next();
    Preconditions.checkArgument(event instanceof InputDataInformationEvent, getClass().getSimpleName() + " can only handle a single event of type: " + InputDataInformationEvent.class.getSimpleName());
    processSplitEvent((InputDataInformationEvent) event);
}
Also used : Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 18 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TezEvent method serializeEvent.

private void serializeEvent(DataOutput out) throws IOException {
    if (event == null) {
        out.writeBoolean(false);
        return;
    }
    out.writeBoolean(true);
    out.writeInt(eventType.ordinal());
    out.writeLong(eventReceivedTime);
    if (eventType.equals(EventType.TASK_STATUS_UPDATE_EVENT)) {
        // TODO NEWTEZ convert to PB
        TaskStatusUpdateEvent sEvt = (TaskStatusUpdateEvent) event;
        sEvt.write(out);
    } else {
        AbstractMessage message;
        switch(eventType) {
            case CUSTOM_PROCESSOR_EVENT:
                message = ProtoConverters.convertCustomProcessorEventToProto((CustomProcessorEvent) event);
                break;
            case DATA_MOVEMENT_EVENT:
                message = ProtoConverters.convertDataMovementEventToProto((DataMovementEvent) event);
                break;
            case COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT:
                message = ProtoConverters.convertCompositeRoutedDataMovementEventToProto((CompositeRoutedDataMovementEvent) event);
                break;
            case COMPOSITE_DATA_MOVEMENT_EVENT:
                message = ProtoConverters.convertCompositeDataMovementEventToProto((CompositeDataMovementEvent) event);
                break;
            case VERTEX_MANAGER_EVENT:
                message = ProtoConverters.convertVertexManagerEventToProto((VertexManagerEvent) event);
                break;
            case INPUT_READ_ERROR_EVENT:
                InputReadErrorEvent ideEvt = (InputReadErrorEvent) event;
                message = InputReadErrorEventProto.newBuilder().setIndex(ideEvt.getIndex()).setDiagnostics(ideEvt.getDiagnostics()).setVersion(ideEvt.getVersion()).build();
                break;
            case TASK_ATTEMPT_FAILED_EVENT:
                TaskAttemptFailedEvent tfEvt = (TaskAttemptFailedEvent) event;
                message = TaskAttemptFailedEventProto.newBuilder().setDiagnostics(tfEvt.getDiagnostics()).setTaskFailureType(TezConverterUtils.failureTypeToProto(tfEvt.getTaskFailureType())).build();
                break;
            case TASK_ATTEMPT_KILLED_EVENT:
                TaskAttemptKilledEvent tkEvent = (TaskAttemptKilledEvent) event;
                message = TaskAttemptKilledEventProto.newBuilder().setDiagnostics(tkEvent.getDiagnostics()).build();
                break;
            case TASK_ATTEMPT_COMPLETED_EVENT:
                message = TaskAttemptCompletedEventProto.newBuilder().build();
                break;
            case INPUT_FAILED_EVENT:
                InputFailedEvent ifEvt = (InputFailedEvent) event;
                message = InputFailedEventProto.newBuilder().setTargetIndex(ifEvt.getTargetIndex()).setVersion(ifEvt.getVersion()).build();
                break;
            case ROOT_INPUT_DATA_INFORMATION_EVENT:
                message = ProtoConverters.convertRootInputDataInformationEventToProto((InputDataInformationEvent) event);
                break;
            case ROOT_INPUT_INITIALIZER_EVENT:
                message = ProtoConverters.convertRootInputInitializerEventToProto((InputInitializerEvent) event);
                break;
            default:
                throw new TezUncheckedException("Unknown TezEvent" + ", type=" + eventType);
        }
        if (out instanceof OutputStream) {
            // DataOutputBuffer extends DataOutputStream
            int serializedSize = message.getSerializedSize();
            out.writeInt(serializedSize);
            int buffersize = serializedSize < CodedOutputStream.DEFAULT_BUFFER_SIZE ? serializedSize : CodedOutputStream.DEFAULT_BUFFER_SIZE;
            CodedOutputStream codedOut = CodedOutputStream.newInstance((OutputStream) out, buffersize);
            message.writeTo(codedOut);
            codedOut.flush();
        } else {
            byte[] eventBytes = message.toByteArray();
            out.writeInt(eventBytes.length);
            out.write(eventBytes);
        }
    }
}
Also used : InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) AbstractMessage(com.google.protobuf.AbstractMessage) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) CodedOutputStream(com.google.protobuf.CodedOutputStream) OutputStream(java.io.OutputStream) CodedOutputStream(com.google.protobuf.CodedOutputStream) InputReadErrorEvent(org.apache.tez.runtime.api.events.InputReadErrorEvent) TaskStatusUpdateEvent(org.apache.tez.runtime.api.events.TaskStatusUpdateEvent) CompositeRoutedDataMovementEvent(org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) CompositeRoutedDataMovementEvent(org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent) TaskAttemptFailedEvent(org.apache.tez.runtime.api.events.TaskAttemptFailedEvent) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) CustomProcessorEvent(org.apache.tez.runtime.api.events.CustomProcessorEvent) TaskAttemptKilledEvent(org.apache.tez.runtime.api.events.TaskAttemptKilledEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 19 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestMRInputAMSplitGenerator method testGroupSplitsAndSortSplits.

private void testGroupSplitsAndSortSplits(boolean groupSplitsEnabled, boolean sortSplitsEnabled) throws Exception {
    Configuration conf = new Configuration();
    String[] splitLengths = new String[50];
    for (int i = 0; i < splitLengths.length; i++) {
        splitLengths[i] = Integer.toString(1000 * (i + 1));
    }
    conf.setStrings(SPLITS_LENGTHS, splitLengths);
    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(conf, InputFormatForTest.class).groupSplits(groupSplitsEnabled).sortSplits(sortSplitsEnabled).build();
    UserPayload userPayload = dataSource.getInputDescriptor().getUserPayload();
    InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
    MRInputAMSplitGenerator splitGenerator = new MRInputAMSplitGenerator(context);
    List<Event> events = splitGenerator.initialize();
    assertTrue(events.get(0) instanceof InputConfigureVertexTasksEvent);
    boolean shuffled = false;
    InputSplit previousIs = null;
    int numRawInputSplits = 0;
    for (int i = 1; i < events.size(); i++) {
        assertTrue(events.get(i) instanceof InputDataInformationEvent);
        InputDataInformationEvent diEvent = (InputDataInformationEvent) (events.get(i));
        assertNull(diEvent.getDeserializedUserPayload());
        assertNotNull(diEvent.getUserPayload());
        MRSplitProto eventProto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent.getUserPayload()));
        InputSplit is = MRInputUtils.getNewSplitDetailsFromEvent(eventProto, new Configuration());
        if (groupSplitsEnabled) {
            numRawInputSplits += ((TezGroupedSplit) is).getGroupedSplits().size();
            for (InputSplit inputSplit : ((TezGroupedSplit) is).getGroupedSplits()) {
                assertTrue(inputSplit instanceof InputSplitForTest);
            }
            assertTrue(((TezGroupedSplit) is).getGroupedSplits().get(0) instanceof InputSplitForTest);
        } else {
            numRawInputSplits++;
            assertTrue(is instanceof InputSplitForTest);
        }
        // the splits.
        if (previousIs != null) {
            if (sortSplitsEnabled) {
                assertTrue(is.getLength() <= previousIs.getLength());
            } else {
                shuffled |= (is.getLength() > previousIs.getLength());
            }
        }
        previousIs = is;
    }
    assertEquals(splitLengths.length, numRawInputSplits);
    if (!sortSplitsEnabled) {
        assertTrue(shuffled);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) TezGroupedSplit(org.apache.hadoop.mapreduce.split.TezGroupedSplit) ByteString(com.google.protobuf.ByteString) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputSplit(org.apache.hadoop.mapreduce.InputSplit) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)

Example 20 with InputDataInformationEvent

use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.

the class TestMRInput method testAttributesInJobConf.

@Test(timeout = 5000)
public void testAttributesInJobConf() throws Exception {
    InputContext inputContext = mock(InputContext.class);
    doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier();
    doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex();
    doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex();
    doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber();
    doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex();
    doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber();
    doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName();
    doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName();
    doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName();
    doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId();
    doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier();
    DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false), TestInputFormat.class).groupSplits(false).build();
    doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
    doReturn(new TezCounters()).when(inputContext).getCounters();
    MRInput mrInput = new MRInput(inputContext, 1);
    mrInput.initialize();
    MRRuntimeProtos.MRSplitProto splitProto = MRRuntimeProtos.MRSplitProto.newBuilder().setSplitClassName(TestInputSplit.class.getName()).build();
    InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
    List<Event> events = new LinkedList<>();
    events.add(diEvent);
    mrInput.handleEvents(events);
    TezCounter counter = mrInput.getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES);
    assertEquals(counter.getValue(), TestInputSplit.length);
    assertTrue(TestInputFormat.invoked.get());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputContext(org.apache.tez.runtime.api.InputContext) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) TezCounter(org.apache.tez.common.counters.TezCounter) MRRuntimeProtos(org.apache.tez.mapreduce.protos.MRRuntimeProtos) TezCounters(org.apache.tez.common.counters.TezCounters) LinkedList(java.util.LinkedList) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) Test(org.junit.Test)

Aggregations

InputDataInformationEvent (org.apache.tez.runtime.api.events.InputDataInformationEvent)22 Event (org.apache.tez.runtime.api.Event)16 MRSplitProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)8 InputSplit (org.apache.hadoop.mapred.InputSplit)7 JobConf (org.apache.hadoop.mapred.JobConf)5 InputContext (org.apache.tez.runtime.api.InputContext)5 InputConfigureVertexTasksEvent (org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent)5 LinkedList (java.util.LinkedList)4 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 SequenceFileInputFormat (org.apache.hadoop.mapred.SequenceFileInputFormat)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 MRInputUserPayloadProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto)4 MRSplitsProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto)4 ByteString (com.google.protobuf.ByteString)3 ArrayList (java.util.ArrayList)3