use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class MRInputSplitDistributor method initialize.
@Override
public List<Event> initialize() throws IOException {
StopWatch sw = new StopWatch().start();
MRInputUserPayloadProto userPayloadProto = MRInputHelpers.parseMRInputPayload(getContext().getInputUserPayload());
sw.stop();
if (LOG.isDebugEnabled()) {
LOG.debug("Time to parse MRInput payload into prot: " + sw.now(TimeUnit.MILLISECONDS));
}
Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());
JobConf jobConf = new JobConf(conf);
boolean useNewApi = jobConf.getUseNewMapper();
sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
LOG.info("Emitting serialized splits: " + sendSerializedEvents);
this.splitsProto = userPayloadProto.getSplits();
MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
updatedPayloadBuilder.clearSplits();
List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent.create(updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());
events.add(updatePayloadEvent);
int count = 0;
for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {
InputDataInformationEvent diEvent;
if (sendSerializedEvents) {
// Unnecessary array copy, can be avoided by using ByteBuffer instead of
// a raw array.
diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
} else {
if (useNewApi) {
org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils.getNewSplitDetailsFromEvent(mrSplit, conf);
diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit);
} else {
org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils.getOldSplitDetailsFromEvent(mrSplit, conf);
diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit);
}
}
events.add(diEvent);
}
return events;
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class MRInput method handleEvents.
@Override
public void handleEvents(List<Event> inputEvents) throws Exception {
if (getNumPhysicalInputs() == 0) {
throw new IllegalStateException("Unexpected event. MRInput has been setup to receive 0 events");
}
if (eventReceived || inputEvents.size() != 1) {
throw new IllegalStateException("MRInput expects only a single input. Received: current eventListSize: " + inputEvents.size() + "Received previous input: " + eventReceived);
}
Event event = inputEvents.iterator().next();
Preconditions.checkArgument(event instanceof InputDataInformationEvent, getClass().getSimpleName() + " can only handle a single event of type: " + InputDataInformationEvent.class.getSimpleName());
processSplitEvent((InputDataInformationEvent) event);
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TezEvent method serializeEvent.
private void serializeEvent(DataOutput out) throws IOException {
if (event == null) {
out.writeBoolean(false);
return;
}
out.writeBoolean(true);
out.writeInt(eventType.ordinal());
out.writeLong(eventReceivedTime);
if (eventType.equals(EventType.TASK_STATUS_UPDATE_EVENT)) {
// TODO NEWTEZ convert to PB
TaskStatusUpdateEvent sEvt = (TaskStatusUpdateEvent) event;
sEvt.write(out);
} else {
AbstractMessage message;
switch(eventType) {
case CUSTOM_PROCESSOR_EVENT:
message = ProtoConverters.convertCustomProcessorEventToProto((CustomProcessorEvent) event);
break;
case DATA_MOVEMENT_EVENT:
message = ProtoConverters.convertDataMovementEventToProto((DataMovementEvent) event);
break;
case COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT:
message = ProtoConverters.convertCompositeRoutedDataMovementEventToProto((CompositeRoutedDataMovementEvent) event);
break;
case COMPOSITE_DATA_MOVEMENT_EVENT:
message = ProtoConverters.convertCompositeDataMovementEventToProto((CompositeDataMovementEvent) event);
break;
case VERTEX_MANAGER_EVENT:
message = ProtoConverters.convertVertexManagerEventToProto((VertexManagerEvent) event);
break;
case INPUT_READ_ERROR_EVENT:
InputReadErrorEvent ideEvt = (InputReadErrorEvent) event;
message = InputReadErrorEventProto.newBuilder().setIndex(ideEvt.getIndex()).setDiagnostics(ideEvt.getDiagnostics()).setVersion(ideEvt.getVersion()).build();
break;
case TASK_ATTEMPT_FAILED_EVENT:
TaskAttemptFailedEvent tfEvt = (TaskAttemptFailedEvent) event;
message = TaskAttemptFailedEventProto.newBuilder().setDiagnostics(tfEvt.getDiagnostics()).setTaskFailureType(TezConverterUtils.failureTypeToProto(tfEvt.getTaskFailureType())).build();
break;
case TASK_ATTEMPT_KILLED_EVENT:
TaskAttemptKilledEvent tkEvent = (TaskAttemptKilledEvent) event;
message = TaskAttemptKilledEventProto.newBuilder().setDiagnostics(tkEvent.getDiagnostics()).build();
break;
case TASK_ATTEMPT_COMPLETED_EVENT:
message = TaskAttemptCompletedEventProto.newBuilder().build();
break;
case INPUT_FAILED_EVENT:
InputFailedEvent ifEvt = (InputFailedEvent) event;
message = InputFailedEventProto.newBuilder().setTargetIndex(ifEvt.getTargetIndex()).setVersion(ifEvt.getVersion()).build();
break;
case ROOT_INPUT_DATA_INFORMATION_EVENT:
message = ProtoConverters.convertRootInputDataInformationEventToProto((InputDataInformationEvent) event);
break;
case ROOT_INPUT_INITIALIZER_EVENT:
message = ProtoConverters.convertRootInputInitializerEventToProto((InputInitializerEvent) event);
break;
default:
throw new TezUncheckedException("Unknown TezEvent" + ", type=" + eventType);
}
if (out instanceof OutputStream) {
// DataOutputBuffer extends DataOutputStream
int serializedSize = message.getSerializedSize();
out.writeInt(serializedSize);
int buffersize = serializedSize < CodedOutputStream.DEFAULT_BUFFER_SIZE ? serializedSize : CodedOutputStream.DEFAULT_BUFFER_SIZE;
CodedOutputStream codedOut = CodedOutputStream.newInstance((OutputStream) out, buffersize);
message.writeTo(codedOut);
codedOut.flush();
} else {
byte[] eventBytes = message.toByteArray();
out.writeInt(eventBytes.length);
out.write(eventBytes);
}
}
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TestMRInputAMSplitGenerator method testGroupSplitsAndSortSplits.
private void testGroupSplitsAndSortSplits(boolean groupSplitsEnabled, boolean sortSplitsEnabled) throws Exception {
Configuration conf = new Configuration();
String[] splitLengths = new String[50];
for (int i = 0; i < splitLengths.length; i++) {
splitLengths[i] = Integer.toString(1000 * (i + 1));
}
conf.setStrings(SPLITS_LENGTHS, splitLengths);
DataSourceDescriptor dataSource = MRInput.createConfigBuilder(conf, InputFormatForTest.class).groupSplits(groupSplitsEnabled).sortSplits(sortSplitsEnabled).build();
UserPayload userPayload = dataSource.getInputDescriptor().getUserPayload();
InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
MRInputAMSplitGenerator splitGenerator = new MRInputAMSplitGenerator(context);
List<Event> events = splitGenerator.initialize();
assertTrue(events.get(0) instanceof InputConfigureVertexTasksEvent);
boolean shuffled = false;
InputSplit previousIs = null;
int numRawInputSplits = 0;
for (int i = 1; i < events.size(); i++) {
assertTrue(events.get(i) instanceof InputDataInformationEvent);
InputDataInformationEvent diEvent = (InputDataInformationEvent) (events.get(i));
assertNull(diEvent.getDeserializedUserPayload());
assertNotNull(diEvent.getUserPayload());
MRSplitProto eventProto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent.getUserPayload()));
InputSplit is = MRInputUtils.getNewSplitDetailsFromEvent(eventProto, new Configuration());
if (groupSplitsEnabled) {
numRawInputSplits += ((TezGroupedSplit) is).getGroupedSplits().size();
for (InputSplit inputSplit : ((TezGroupedSplit) is).getGroupedSplits()) {
assertTrue(inputSplit instanceof InputSplitForTest);
}
assertTrue(((TezGroupedSplit) is).getGroupedSplits().get(0) instanceof InputSplitForTest);
} else {
numRawInputSplits++;
assertTrue(is instanceof InputSplitForTest);
}
// the splits.
if (previousIs != null) {
if (sortSplitsEnabled) {
assertTrue(is.getLength() <= previousIs.getLength());
} else {
shuffled |= (is.getLength() > previousIs.getLength());
}
}
previousIs = is;
}
assertEquals(splitLengths.length, numRawInputSplits);
if (!sortSplitsEnabled) {
assertTrue(shuffled);
}
}
use of org.apache.tez.runtime.api.events.InputDataInformationEvent in project tez by apache.
the class TestMRInput method testAttributesInJobConf.
@Test(timeout = 5000)
public void testAttributesInJobConf() throws Exception {
InputContext inputContext = mock(InputContext.class);
doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier();
doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex();
doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex();
doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber();
doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex();
doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber();
doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName();
doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName();
doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName();
doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId();
doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier();
DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false), TestInputFormat.class).groupSplits(false).build();
doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
doReturn(new TezCounters()).when(inputContext).getCounters();
MRInput mrInput = new MRInput(inputContext, 1);
mrInput.initialize();
MRRuntimeProtos.MRSplitProto splitProto = MRRuntimeProtos.MRSplitProto.newBuilder().setSplitClassName(TestInputSplit.class.getName()).build();
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
List<Event> events = new LinkedList<>();
events.add(diEvent);
mrInput.handleEvents(events);
TezCounter counter = mrInput.getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES);
assertEquals(counter.getValue(), TestInputSplit.length);
assertTrue(TestInputFormat.invoked.get());
}
Aggregations