use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class VertexImpl method setupVertex.
private VertexState setupVertex() {
this.initTimeRequested = clock.getTime();
// VertexManager for handling.
if (dagVertexGroups != null && !dagVertexGroups.isEmpty()) {
List<GroupInputSpec> groupSpecList = Lists.newLinkedList();
for (VertexGroupInfo groupInfo : dagVertexGroups.values()) {
if (groupInfo.edgeMergedInputs.containsKey(getName())) {
InputDescriptor mergedInput = groupInfo.edgeMergedInputs.get(getName());
groupSpecList.add(new GroupInputSpec(groupInfo.groupName, Lists.newLinkedList(groupInfo.groupMembers), mergedInput));
}
}
if (!groupSpecList.isEmpty()) {
groupInputSpecList = groupSpecList;
}
}
// Check if any inputs need initializers
if (rootInputDescriptors != null) {
LOG.info("Root Inputs exist for Vertex: " + getName() + " : " + rootInputDescriptors);
for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : rootInputDescriptors.values()) {
if (input.getControllerDescriptor() != null && input.getControllerDescriptor().getClassName() != null) {
if (inputsWithInitializers == null) {
inputsWithInitializers = Sets.newHashSet();
}
inputsWithInitializers.add(input.getName());
LOG.info("Starting root input initializer for input: " + input.getName() + ", with class: [" + input.getControllerDescriptor().getClassName() + "]");
}
}
}
boolean hasBipartite = false;
if (sourceVertices != null) {
for (Edge edge : sourceVertices.values()) {
if (edge.getEdgeProperty().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
hasBipartite = true;
break;
}
}
}
if (hasBipartite && inputsWithInitializers != null) {
LOG.error("A vertex with an Initial Input and a Shuffle Input are not supported at the moment");
return finished(VertexState.FAILED);
}
numTasks = getVertexPlan().getTaskConfig().getNumTasks();
if (!(numTasks == -1 || numTasks >= 0)) {
addDiagnostic("Invalid task count for vertex" + ", numTasks=" + numTasks);
trySetTerminationCause(VertexTerminationCause.INVALID_NUM_OF_TASKS);
return VertexState.FAILED;
}
checkTaskLimits();
// reset to -1 after the restore.
try {
assignVertexManager();
} catch (TezException e1) {
String msg = "Fail to create VertexManager, " + ExceptionUtils.getStackTrace(e1);
LOG.error(msg);
return finished(VertexState.FAILED, VertexTerminationCause.INIT_FAILURE, msg);
}
try {
vertexManager.initialize();
vmIsInitialized.set(true);
if (!pendingVmEvents.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Processing: " + pendingVmEvents.size() + " pending VMEvents for Vertex: " + logIdentifier);
}
for (VertexManagerEvent vmEvent : pendingVmEvents) {
vertexManager.onVertexManagerEventReceived(vmEvent);
}
pendingVmEvents.clear();
}
} catch (AMUserCodeException e) {
String msg = "Exception in " + e.getSource() + ", vertex:" + logIdentifier;
LOG.error(msg, e);
finished(VertexState.FAILED, VertexTerminationCause.AM_USERCODE_FAILURE, msg + ", " + e.getMessage() + ", " + ExceptionUtils.getStackTrace(e.getCause()));
return VertexState.FAILED;
}
return VertexState.INITED;
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestOutput method close.
@Override
public List<Event> close() throws Exception {
LOG.info("Sending data movement event with value: " + output);
getContext().getCounters().findCounter(COUNTER_NAME, COUNTER_NAME).increment(1);
;
ByteBuffer result = ByteBuffer.allocate(4).putInt(output);
result.flip();
List<Event> events = Lists.newArrayListWithCapacity(getNumPhysicalOutputs());
for (int i = 0; i < getNumPhysicalOutputs(); i++) {
DataMovementEvent event = DataMovementEvent.create(i, result);
events.add(event);
}
ShuffleUserPayloads.VertexManagerEventPayloadProto.Builder vmBuilder = ShuffleUserPayloads.VertexManagerEventPayloadProto.newBuilder().setNumRecord(1);
VertexManagerEvent vmEvent = VertexManagerEvent.create(getContext().getDestinationVertexName(), vmBuilder.build().toByteString().asReadOnlyByteBuffer());
events.add(vmEvent);
return events;
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleUtils method testGenerateOnSpillEvent_With_FinalMerge.
@Test
public void testGenerateOnSpillEvent_With_FinalMerge() throws Exception {
List<Event> events = Lists.newLinkedList();
Path indexFile = createIndexFile(10, false);
boolean finalMergeEnabled = true;
boolean isLastEvent = true;
int spillId = 0;
int physicalOutputs = 10;
String pathComponent = "/attempt_x_y_0/file.out";
String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
// normal code path where we do final merge all the time
ShuffleUtils.generateEventOnSpill(events, finalMergeEnabled, isLastEvent, outputContext, spillId, new TezSpillRecord(indexFile, conf), physicalOutputs, true, pathComponent, null, false, auxiliaryService, TezCommonUtils.newBestCompressionDeflater());
// one for VM
Assert.assertTrue(events.size() == 2);
Assert.assertTrue(events.get(0) instanceof VertexManagerEvent);
Assert.assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
Assert.assertTrue(cdme.getCount() == physicalOutputs);
Assert.assertTrue(cdme.getSourceIndexStart() == 0);
ShuffleUserPayloads.DataMovementEventPayloadProto dmeProto = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
// With final merge, spill details should not be present
Assert.assertFalse(dmeProto.hasSpillId());
Assert.assertFalse(dmeProto.hasLastEvent() || dmeProto.getLastEvent());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(dmeProto.getEmptyPartitions());
BitSet emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
Assert.assertTrue("emptyPartitionBitSet cardinality (expecting 5) = " + emptyPartitionsBitSet.cardinality(), emptyPartitionsBitSet.cardinality() == 5);
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestFairShuffleVertexManager method testSchedulingWithPartitionStats.
// Create a DAG with one destination vertexes connected to 3 source vertexes.
// There are 3 tasks for each vertex. One edge is of type SCATTER_GATHER.
// The other edges are BROADCAST.
private void testSchedulingWithPartitionStats(FairRoutingType fairRoutingType, int numTasks, long[] partitionStats, int numCompletedEvents, int expectedScheduledTasks, int expectedNumDestinationConsumerTasks, Map<String, EdgeManagerPlugin> newEdgeManagers) throws Exception {
Configuration conf = new Configuration();
FairShuffleVertexManager manager;
HashMap<String, EdgeProperty> mockInputVertices = new HashMap<String, EdgeProperty>();
String r1 = "R1";
final int numOfTasksInr1 = numTasks;
EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
String m2 = "M2";
final int numOfTasksInM2 = 3;
EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
String m3 = "M3";
final int numOfTasksInM3 = 3;
EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
final String mockManagedVertexId = "R2";
final int numOfTasksInDestination = 3;
mockInputVertices.put(r1, eProp1);
mockInputVertices.put(m2, eProp2);
mockInputVertices.put(m3, eProp3);
final VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class);
when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices);
when(mockContext.getVertexName()).thenReturn(mockManagedVertexId);
when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(numOfTasksInDestination);
when(mockContext.getVertexNumTasks(r1)).thenReturn(numOfTasksInr1);
when(mockContext.getVertexNumTasks(m2)).thenReturn(numOfTasksInM2);
when(mockContext.getVertexNumTasks(m3)).thenReturn(numOfTasksInM3);
final List<Integer> scheduledTasks = Lists.newLinkedList();
doAnswer(new ScheduledTasksAnswer(scheduledTasks)).when(mockContext).scheduleTasks(anyList());
doAnswer(new reconfigVertexAnswer(mockContext, mockManagedVertexId, newEdgeManagers)).when(mockContext).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
// check initialization
manager = createFairShuffleVertexManager(conf, mockContext, fairRoutingType, 1000 * MB, 0.001f, 0.001f);
manager.onVertexStarted(emptyCompletions);
Assert.assertTrue(manager.bipartiteSources == 1);
manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED));
Assert.assertEquals(numOfTasksInDestination, // no tasks scheduled
manager.pendingTasks.size());
Assert.assertEquals(numOfTasksInr1, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == numOfTasksInDestination);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == numOfTasksInr1);
for (int i = 0; i < numCompletedEvents; i++) {
VertexManagerEvent vmEvent = getVertexManagerEvent(partitionStats, 0, r1, true);
manager.onSourceTaskCompleted(vmEvent.getProducerAttemptIdentifier());
// send VM event
manager.onVertexManagerEventReceived(vmEvent);
}
// Send an event for m2.
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m2, 0));
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == numOfTasksInDestination);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == numOfTasksInr1);
// Send an event for m3.
manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 0));
// all tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 0);
Assert.assertTrue(scheduledTasks.size() == expectedScheduledTasks);
Assert.assertEquals(1, newEdgeManagers.size());
EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
// the same as original number of partitions.
for (int i = 0; i < numOfTasksInr1; i++) {
Assert.assertEquals(numOfTasksInDestination, edgeManager.getNumSourceTaskPhysicalOutputs(0));
}
for (int sourceTaskIndex = 0; sourceTaskIndex < numOfTasksInr1; sourceTaskIndex++) {
Assert.assertEquals(expectedNumDestinationConsumerTasks, edgeManager.getNumDestinationConsumerTasks(sourceTaskIndex));
}
}
use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.
the class TestShuffleVertexManager method testSchedulingWithPartitionStats.
@Test(timeout = 5000)
public void testSchedulingWithPartitionStats() throws IOException {
Configuration conf = new Configuration();
ShuffleVertexManagerBase manager;
HashMap<String, EdgeProperty> mockInputVertices = new HashMap<String, EdgeProperty>();
String r1 = "R1";
EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
String m2 = "M2";
EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
String m3 = "M3";
EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
final String mockManagedVertexId = "R2";
mockInputVertices.put(r1, eProp1);
mockInputVertices.put(m2, eProp2);
mockInputVertices.put(m3, eProp3);
VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class);
when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices);
when(mockContext.getVertexName()).thenReturn(mockManagedVertexId);
when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(3);
when(mockContext.getVertexNumTasks(r1)).thenReturn(3);
when(mockContext.getVertexNumTasks(m2)).thenReturn(3);
when(mockContext.getVertexNumTasks(m3)).thenReturn(3);
final List<Integer> scheduledTasks = Lists.newLinkedList();
doAnswer(new ScheduledTasksAnswer(scheduledTasks)).when(mockContext).scheduleTasks(anyList());
// check initialization
manager = createManager(conf, mockContext, 0.001f, 0.001f);
manager.onVertexStarted(emptyCompletions);
Assert.assertTrue(manager.bipartiteSources == 1);
manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED));
manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED));
// no tasks scheduled
Assert.assertEquals(3, manager.pendingTasks.size());
Assert.assertEquals(3, manager.totalNumBipartiteSourceTasks);
Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
// Send an event for r1.
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(r1, 0));
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 3);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3);
// Tasks should be scheduled in task 2, 0, 1 order
long[] sizes = new long[] { (100 * 1000l * 1000l), (0l), (5000 * 1000l * 1000l) };
VertexManagerEvent vmEvent = getVertexManagerEvent(sizes, 1060000000, r1);
// send VM event
manager.onVertexManagerEventReceived(vmEvent);
// stats from another vertex (more of empty stats)
sizes = new long[] { (0l), (0l), (0l) };
vmEvent = getVertexManagerEvent(sizes, 1060000000, r1);
// send VM event
manager.onVertexManagerEventReceived(vmEvent);
// Send an event for m2.
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m2, 0));
// no tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 3);
Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3);
// Send an event for m3.
manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED));
manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 0));
// all tasks scheduled
Assert.assertTrue(manager.pendingTasks.size() == 0);
Assert.assertTrue(scheduledTasks.size() == 3);
// Order of scheduling should be 2,0,1 based on the available partition statistics
Assert.assertTrue(scheduledTasks.get(0) == 2);
Assert.assertTrue(scheduledTasks.get(1) == 0);
Assert.assertTrue(scheduledTasks.get(2) == 1);
}
Aggregations