use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.
the class RootInputVertexManager method onRootVertexInitialized.
@Override
public void onRootVertexInitialized(String inputName, InputDescriptor inputDescriptor, List<Event> events) {
List<InputDataInformationEvent> riEvents = Lists.newLinkedList();
boolean dataInformationEventSeen = false;
for (Event event : events) {
if (event instanceof InputConfigureVertexTasksEvent) {
// No tasks should have been started yet. Checked by initial state check.
Preconditions.checkState(dataInformationEventSeen == false);
Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) == -1, "Parallelism for the vertex should be set to -1 if the InputInitializer is setting parallelism" + ", VertexName: " + getContext().getVertexName());
Preconditions.checkState(configuredInputName == null, "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName: " + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
configuredInputName = inputName;
InputConfigureVertexTasksEvent cEvent = (InputConfigureVertexTasksEvent) event;
Map<String, InputSpecUpdate> rootInputSpecUpdate = new HashMap<String, InputSpecUpdate>();
rootInputSpecUpdate.put(inputName, cEvent.getInputSpecUpdate() == null ? InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate() : cEvent.getInputSpecUpdate());
getContext().reconfigureVertex(rootInputSpecUpdate, cEvent.getLocationHint(), cEvent.getNumTasks());
}
if (event instanceof InputUpdatePayloadEvent) {
// No tasks should have been started yet. Checked by initial state check.
Preconditions.checkState(dataInformationEventSeen == false);
inputDescriptor.setUserPayload(UserPayload.create(((InputUpdatePayloadEvent) event).getUserPayload()));
} else if (event instanceof InputDataInformationEvent) {
dataInformationEventSeen = true;
// # Tasks should have been set by this point.
Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) != 0);
Preconditions.checkState(configuredInputName == null || configuredInputName.equals(inputName), "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName:" + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
configuredInputName = inputName;
InputDataInformationEvent rEvent = (InputDataInformationEvent) event;
// 1:1 routing
rEvent.setTargetIndex(rEvent.getSourceIndex());
riEvents.add(rEvent);
}
}
getContext().addRootInputEvents(inputName, riEvents);
}
use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.
the class TestMRInputSplitDistributor method testSerializedPayload.
@Test(timeout = 5000)
public void testSerializedPayload() throws IOException {
Configuration conf = new Configuration(false);
conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, true);
ByteString confByteString = TezUtils.createByteStringFromConf(conf);
InputSplit split1 = new InputSplitForTest(1);
InputSplit split2 = new InputSplitForTest(2);
MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
splitsProtoBuilder.addSplits(proto1);
splitsProtoBuilder.addSplits(proto2);
MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
payloadProto.setSplits(splitsProtoBuilder.build());
payloadProto.setConfigurationBytes(confByteString);
UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
List<Event> events = splitDist.initialize();
assertEquals(3, events.size());
assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
assertTrue(events.get(1) instanceof InputDataInformationEvent);
assertTrue(events.get(2) instanceof InputDataInformationEvent);
InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
assertNull(diEvent1.getDeserializedUserPayload());
assertNull(diEvent2.getDeserializedUserPayload());
assertNotNull(diEvent1.getUserPayload());
assertNotNull(diEvent2.getUserPayload());
MRSplitProto event1Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent1.getUserPayload()));
InputSplit is1 = MRInputUtils.getOldSplitDetailsFromEvent(event1Proto, new Configuration());
assertTrue(is1 instanceof InputSplitForTest);
assertEquals(1, ((InputSplitForTest) is1).identifier);
MRSplitProto event2Proto = MRSplitProto.parseFrom(ByteString.copyFrom(diEvent2.getUserPayload()));
InputSplit is2 = MRInputUtils.getOldSplitDetailsFromEvent(event2Proto, new Configuration());
assertTrue(is2 instanceof InputSplitForTest);
assertEquals(2, ((InputSplitForTest) is2).identifier);
}
use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.
the class TestMRInputSplitDistributor method testDeserializedPayload.
@Test(timeout = 5000)
public void testDeserializedPayload() throws IOException {
Configuration conf = new Configuration(false);
conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, false);
ByteString confByteString = TezUtils.createByteStringFromConf(conf);
InputSplit split1 = new InputSplitForTest(1);
InputSplit split2 = new InputSplitForTest(2);
MRSplitProto proto1 = MRInputHelpers.createSplitProto(split1);
MRSplitProto proto2 = MRInputHelpers.createSplitProto(split2);
MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder();
splitsProtoBuilder.addSplits(proto1);
splitsProtoBuilder.addSplits(proto2);
MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder();
payloadProto.setSplits(splitsProtoBuilder.build());
payloadProto.setConfigurationBytes(confByteString);
UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer());
InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload);
MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context);
List<Event> events = splitDist.initialize();
assertEquals(3, events.size());
assertTrue(events.get(0) instanceof InputUpdatePayloadEvent);
assertTrue(events.get(1) instanceof InputDataInformationEvent);
assertTrue(events.get(2) instanceof InputDataInformationEvent);
InputDataInformationEvent diEvent1 = (InputDataInformationEvent) (events.get(1));
InputDataInformationEvent diEvent2 = (InputDataInformationEvent) (events.get(2));
assertNull(diEvent1.getUserPayload());
assertNull(diEvent2.getUserPayload());
assertNotNull(diEvent1.getDeserializedUserPayload());
assertNotNull(diEvent2.getDeserializedUserPayload());
assertTrue(diEvent1.getDeserializedUserPayload() instanceof InputSplitForTest);
assertEquals(1, ((InputSplitForTest) diEvent1.getDeserializedUserPayload()).identifier);
assertTrue(diEvent2.getDeserializedUserPayload() instanceof InputSplitForTest);
assertEquals(2, ((InputSplitForTest) diEvent2.getDeserializedUserPayload()).identifier);
}
use of org.apache.tez.runtime.api.events.InputUpdatePayloadEvent in project tez by apache.
the class MRInputSplitDistributor method initialize.
@Override
public List<Event> initialize() throws IOException {
StopWatch sw = new StopWatch().start();
MRInputUserPayloadProto userPayloadProto = MRInputHelpers.parseMRInputPayload(getContext().getInputUserPayload());
sw.stop();
if (LOG.isDebugEnabled()) {
LOG.debug("Time to parse MRInput payload into prot: " + sw.now(TimeUnit.MILLISECONDS));
}
Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());
JobConf jobConf = new JobConf(conf);
boolean useNewApi = jobConf.getUseNewMapper();
sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
LOG.info("Emitting serialized splits: " + sendSerializedEvents);
this.splitsProto = userPayloadProto.getSplits();
MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
updatedPayloadBuilder.clearSplits();
List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent.create(updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());
events.add(updatePayloadEvent);
int count = 0;
for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {
InputDataInformationEvent diEvent;
if (sendSerializedEvents) {
// Unnecessary array copy, can be avoided by using ByteBuffer instead of
// a raw array.
diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
} else {
if (useNewApi) {
org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils.getNewSplitDetailsFromEvent(mrSplit, conf);
diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit);
} else {
org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils.getOldSplitDetailsFromEvent(mrSplit, conf);
diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit);
}
}
events.add(diEvent);
}
return events;
}
Aggregations