Search in sources :

Example 21 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class VertexImpl method setupVertex.

private VertexState setupVertex() {
    this.initTimeRequested = clock.getTime();
    // VertexManager for handling.
    if (dagVertexGroups != null && !dagVertexGroups.isEmpty()) {
        List<GroupInputSpec> groupSpecList = Lists.newLinkedList();
        for (VertexGroupInfo groupInfo : dagVertexGroups.values()) {
            if (groupInfo.edgeMergedInputs.containsKey(getName())) {
                InputDescriptor mergedInput = groupInfo.edgeMergedInputs.get(getName());
                groupSpecList.add(new GroupInputSpec(groupInfo.groupName, Lists.newLinkedList(groupInfo.groupMembers), mergedInput));
            }
        }
        if (!groupSpecList.isEmpty()) {
            groupInputSpecList = groupSpecList;
        }
    }
    // Check if any inputs need initializers
    if (rootInputDescriptors != null) {
        LOG.info("Root Inputs exist for Vertex: " + getName() + " : " + rootInputDescriptors);
        for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : rootInputDescriptors.values()) {
            if (input.getControllerDescriptor() != null && input.getControllerDescriptor().getClassName() != null) {
                if (inputsWithInitializers == null) {
                    inputsWithInitializers = Sets.newHashSet();
                }
                inputsWithInitializers.add(input.getName());
                LOG.info("Starting root input initializer for input: " + input.getName() + ", with class: [" + input.getControllerDescriptor().getClassName() + "]");
            }
        }
    }
    boolean hasBipartite = false;
    if (sourceVertices != null) {
        for (Edge edge : sourceVertices.values()) {
            if (edge.getEdgeProperty().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
                hasBipartite = true;
                break;
            }
        }
    }
    if (hasBipartite && inputsWithInitializers != null) {
        LOG.error("A vertex with an Initial Input and a Shuffle Input are not supported at the moment");
        return finished(VertexState.FAILED);
    }
    numTasks = getVertexPlan().getTaskConfig().getNumTasks();
    if (!(numTasks == -1 || numTasks >= 0)) {
        addDiagnostic("Invalid task count for vertex" + ", numTasks=" + numTasks);
        trySetTerminationCause(VertexTerminationCause.INVALID_NUM_OF_TASKS);
        return VertexState.FAILED;
    }
    checkTaskLimits();
    // reset to -1 after the restore.
    try {
        assignVertexManager();
    } catch (TezException e1) {
        String msg = "Fail to create VertexManager, " + ExceptionUtils.getStackTrace(e1);
        LOG.error(msg);
        return finished(VertexState.FAILED, VertexTerminationCause.INIT_FAILURE, msg);
    }
    try {
        vertexManager.initialize();
        vmIsInitialized.set(true);
        if (!pendingVmEvents.isEmpty()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Processing: " + pendingVmEvents.size() + " pending VMEvents for Vertex: " + logIdentifier);
            }
            for (VertexManagerEvent vmEvent : pendingVmEvents) {
                vertexManager.onVertexManagerEventReceived(vmEvent);
            }
            pendingVmEvents.clear();
        }
    } catch (AMUserCodeException e) {
        String msg = "Exception in " + e.getSource() + ", vertex:" + logIdentifier;
        LOG.error(msg, e);
        finished(VertexState.FAILED, VertexTerminationCause.AM_USERCODE_FAILURE, msg + ", " + e.getMessage() + ", " + ExceptionUtils.getStackTrace(e.getCause()));
        return VertexState.FAILED;
    }
    return VertexState.INITED;
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) TezException(org.apache.tez.dag.api.TezException) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) VertexGroupInfo(org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec)

Example 22 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestOutput method close.

@Override
public List<Event> close() throws Exception {
    LOG.info("Sending data movement event with value: " + output);
    getContext().getCounters().findCounter(COUNTER_NAME, COUNTER_NAME).increment(1);
    ;
    ByteBuffer result = ByteBuffer.allocate(4).putInt(output);
    result.flip();
    List<Event> events = Lists.newArrayListWithCapacity(getNumPhysicalOutputs());
    for (int i = 0; i < getNumPhysicalOutputs(); i++) {
        DataMovementEvent event = DataMovementEvent.create(i, result);
        events.add(event);
    }
    ShuffleUserPayloads.VertexManagerEventPayloadProto.Builder vmBuilder = ShuffleUserPayloads.VertexManagerEventPayloadProto.newBuilder().setNumRecord(1);
    VertexManagerEvent vmEvent = VertexManagerEvent.create(getContext().getDestinationVertexName(), vmBuilder.build().toByteString().asReadOnlyByteBuffer());
    events.add(vmEvent);
    return events;
}
Also used : VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) Event(org.apache.tez.runtime.api.Event) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) ByteBuffer(java.nio.ByteBuffer)

Example 23 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestShuffleUtils method testGenerateOnSpillEvent_With_FinalMerge.

@Test
public void testGenerateOnSpillEvent_With_FinalMerge() throws Exception {
    List<Event> events = Lists.newLinkedList();
    Path indexFile = createIndexFile(10, false);
    boolean finalMergeEnabled = true;
    boolean isLastEvent = true;
    int spillId = 0;
    int physicalOutputs = 10;
    String pathComponent = "/attempt_x_y_0/file.out";
    String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
    // normal code path where we do final merge all the time
    ShuffleUtils.generateEventOnSpill(events, finalMergeEnabled, isLastEvent, outputContext, spillId, new TezSpillRecord(indexFile, conf), physicalOutputs, true, pathComponent, null, false, auxiliaryService, TezCommonUtils.newBestCompressionDeflater());
    // one for VM
    Assert.assertTrue(events.size() == 2);
    Assert.assertTrue(events.get(0) instanceof VertexManagerEvent);
    Assert.assertTrue(events.get(1) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1);
    Assert.assertTrue(cdme.getCount() == physicalOutputs);
    Assert.assertTrue(cdme.getSourceIndexStart() == 0);
    ShuffleUserPayloads.DataMovementEventPayloadProto dmeProto = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    // With final merge, spill details should not be present
    Assert.assertFalse(dmeProto.hasSpillId());
    Assert.assertFalse(dmeProto.hasLastEvent() || dmeProto.getLastEvent());
    byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(dmeProto.getEmptyPartitions());
    BitSet emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
    Assert.assertTrue("emptyPartitionBitSet cardinality (expecting 5) = " + emptyPartitionsBitSet.cardinality(), emptyPartitionsBitSet.cardinality() == 5);
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) ByteString(com.google.protobuf.ByteString) Mockito.anyString(org.mockito.Mockito.anyString) TezSpillRecord(org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 24 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestFairShuffleVertexManager method testSchedulingWithPartitionStats.

// Create a DAG with one destination vertexes connected to 3 source vertexes.
// There are 3 tasks for each vertex. One edge is of type SCATTER_GATHER.
// The other edges are BROADCAST.
private void testSchedulingWithPartitionStats(FairRoutingType fairRoutingType, int numTasks, long[] partitionStats, int numCompletedEvents, int expectedScheduledTasks, int expectedNumDestinationConsumerTasks, Map<String, EdgeManagerPlugin> newEdgeManagers) throws Exception {
    Configuration conf = new Configuration();
    FairShuffleVertexManager manager;
    HashMap<String, EdgeProperty> mockInputVertices = new HashMap<String, EdgeProperty>();
    String r1 = "R1";
    final int numOfTasksInr1 = numTasks;
    EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
    String m2 = "M2";
    final int numOfTasksInM2 = 3;
    EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
    String m3 = "M3";
    final int numOfTasksInM3 = 3;
    EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
    final String mockManagedVertexId = "R2";
    final int numOfTasksInDestination = 3;
    mockInputVertices.put(r1, eProp1);
    mockInputVertices.put(m2, eProp2);
    mockInputVertices.put(m3, eProp3);
    final VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class);
    when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices);
    when(mockContext.getVertexName()).thenReturn(mockManagedVertexId);
    when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(numOfTasksInDestination);
    when(mockContext.getVertexNumTasks(r1)).thenReturn(numOfTasksInr1);
    when(mockContext.getVertexNumTasks(m2)).thenReturn(numOfTasksInM2);
    when(mockContext.getVertexNumTasks(m3)).thenReturn(numOfTasksInM3);
    final List<Integer> scheduledTasks = Lists.newLinkedList();
    doAnswer(new ScheduledTasksAnswer(scheduledTasks)).when(mockContext).scheduleTasks(anyList());
    doAnswer(new reconfigVertexAnswer(mockContext, mockManagedVertexId, newEdgeManagers)).when(mockContext).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
    // check initialization
    manager = createFairShuffleVertexManager(conf, mockContext, fairRoutingType, 1000 * MB, 0.001f, 0.001f);
    manager.onVertexStarted(emptyCompletions);
    Assert.assertTrue(manager.bipartiteSources == 1);
    manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED));
    manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED));
    Assert.assertEquals(numOfTasksInDestination, // no tasks scheduled
    manager.pendingTasks.size());
    Assert.assertEquals(numOfTasksInr1, manager.totalNumBipartiteSourceTasks);
    Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
    // no tasks scheduled
    Assert.assertTrue(manager.pendingTasks.size() == numOfTasksInDestination);
    Assert.assertTrue(manager.totalNumBipartiteSourceTasks == numOfTasksInr1);
    for (int i = 0; i < numCompletedEvents; i++) {
        VertexManagerEvent vmEvent = getVertexManagerEvent(partitionStats, 0, r1, true);
        manager.onSourceTaskCompleted(vmEvent.getProducerAttemptIdentifier());
        // send VM event
        manager.onVertexManagerEventReceived(vmEvent);
    }
    // Send an event for m2.
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m2, 0));
    // no tasks scheduled
    Assert.assertTrue(manager.pendingTasks.size() == numOfTasksInDestination);
    Assert.assertTrue(manager.totalNumBipartiteSourceTasks == numOfTasksInr1);
    // Send an event for m3.
    manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED));
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 0));
    // all tasks scheduled
    Assert.assertTrue(manager.pendingTasks.size() == 0);
    Assert.assertTrue(scheduledTasks.size() == expectedScheduledTasks);
    Assert.assertEquals(1, newEdgeManagers.size());
    EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
    // the same as original number of partitions.
    for (int i = 0; i < numOfTasksInr1; i++) {
        Assert.assertEquals(numOfTasksInDestination, edgeManager.getNumSourceTaskPhysicalOutputs(0));
    }
    for (int sourceTaskIndex = 0; sourceTaskIndex < numOfTasksInr1; sourceTaskIndex++) {
        Assert.assertEquals(expectedNumDestinationConsumerTasks, edgeManager.getNumDestinationConsumerTasks(sourceTaskIndex));
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) HashMap(java.util.HashMap) EdgeManagerPluginOnDemand(org.apache.tez.dag.api.EdgeManagerPluginOnDemand) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) VertexStateUpdate(org.apache.tez.dag.api.event.VertexStateUpdate) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint)

Example 25 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestShuffleVertexManager method testSchedulingWithPartitionStats.

@Test(timeout = 5000)
public void testSchedulingWithPartitionStats() throws IOException {
    Configuration conf = new Configuration();
    ShuffleVertexManagerBase manager;
    HashMap<String, EdgeProperty> mockInputVertices = new HashMap<String, EdgeProperty>();
    String r1 = "R1";
    EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
    String m2 = "M2";
    EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
    String m3 = "M3";
    EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in"));
    final String mockManagedVertexId = "R2";
    mockInputVertices.put(r1, eProp1);
    mockInputVertices.put(m2, eProp2);
    mockInputVertices.put(m3, eProp3);
    VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class);
    when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices);
    when(mockContext.getVertexName()).thenReturn(mockManagedVertexId);
    when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(3);
    when(mockContext.getVertexNumTasks(r1)).thenReturn(3);
    when(mockContext.getVertexNumTasks(m2)).thenReturn(3);
    when(mockContext.getVertexNumTasks(m3)).thenReturn(3);
    final List<Integer> scheduledTasks = Lists.newLinkedList();
    doAnswer(new ScheduledTasksAnswer(scheduledTasks)).when(mockContext).scheduleTasks(anyList());
    // check initialization
    manager = createManager(conf, mockContext, 0.001f, 0.001f);
    manager.onVertexStarted(emptyCompletions);
    Assert.assertTrue(manager.bipartiteSources == 1);
    manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED));
    manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED));
    // no tasks scheduled
    Assert.assertEquals(3, manager.pendingTasks.size());
    Assert.assertEquals(3, manager.totalNumBipartiteSourceTasks);
    Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
    // Send an event for r1.
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(r1, 0));
    // no tasks scheduled
    Assert.assertTrue(manager.pendingTasks.size() == 3);
    Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3);
    // Tasks should be scheduled in task 2, 0, 1 order
    long[] sizes = new long[] { (100 * 1000l * 1000l), (0l), (5000 * 1000l * 1000l) };
    VertexManagerEvent vmEvent = getVertexManagerEvent(sizes, 1060000000, r1);
    // send VM event
    manager.onVertexManagerEventReceived(vmEvent);
    // stats from another vertex (more of empty stats)
    sizes = new long[] { (0l), (0l), (0l) };
    vmEvent = getVertexManagerEvent(sizes, 1060000000, r1);
    // send VM event
    manager.onVertexManagerEventReceived(vmEvent);
    // Send an event for m2.
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m2, 0));
    // no tasks scheduled
    Assert.assertTrue(manager.pendingTasks.size() == 3);
    Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3);
    // Send an event for m3.
    manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED));
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m3, 0));
    // all tasks scheduled
    Assert.assertTrue(manager.pendingTasks.size() == 0);
    Assert.assertTrue(scheduledTasks.size() == 3);
    // Order of scheduling should be 2,0,1 based on the available partition statistics
    Assert.assertTrue(scheduledTasks.get(0) == 2);
    Assert.assertTrue(scheduledTasks.get(1) == 0);
    Assert.assertTrue(scheduledTasks.get(2) == 1);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) VertexStateUpdate(org.apache.tez.dag.api.event.VertexStateUpdate) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) Test(org.junit.Test)

Aggregations

VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)30 Test (org.junit.Test)15 ByteString (com.google.protobuf.ByteString)12 Configuration (org.apache.hadoop.conf.Configuration)12 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)12 Event (org.apache.tez.runtime.api.Event)10 BitSet (java.util.BitSet)8 HashMap (java.util.HashMap)8 VertexStateUpdate (org.apache.tez.dag.api.event.VertexStateUpdate)8 ByteBuffer (java.nio.ByteBuffer)6 Path (org.apache.hadoop.fs.Path)6 VertexManagerPluginContext (org.apache.tez.dag.api.VertexManagerPluginContext)6 DataMovementEventPayloadProto (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto)5 Configurable (org.apache.hadoop.conf.Configurable)4 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)4 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)4 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)4 TezCounter (org.apache.tez.common.counters.TezCounter)4 TezCounters (org.apache.tez.common.counters.TezCounters)4 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)4