Search in sources :

Example 6 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestOnFileSortedOutput method baseTest.

@Test(timeout = 5000)
public void baseTest() throws Exception {
    startSortedOutput(partitions);
    // Write random set of keys
    long recordsWritten = numRecords.get();
    for (int i = 0; i < Math.max(1, rnd.nextInt(50)); i++) {
        Text key = new Text(new BigInteger(256, rnd).toString());
        LinkedList values = new LinkedList();
        for (int j = 0; j < Math.max(2, rnd.nextInt(10)); j++) {
            recordsWritten++;
            values.add(new Text(new BigInteger(256, rnd).toString()));
        }
        writer.write(key, values);
    }
    List<Event> eventList = sortedOutput.close();
    assertTrue(eventList != null && eventList.size() == 2);
    assertEquals(recordsWritten, numRecords.get());
    ShuffleUserPayloads.DataMovementEventPayloadProto payload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(((CompositeDataMovementEvent) eventList.get(1)).getUserPayload()));
    ShuffleUserPayloads.VertexManagerEventPayloadProto vmPayload = ShuffleUserPayloads.VertexManagerEventPayloadProto.parseFrom(ByteString.copyFrom(((VertexManagerEvent) eventList.get(0)).getUserPayload()));
    if (reportPartitionStats.isPrecise()) {
        assertTrue(vmPayload.hasDetailedPartitionStats());
    } else {
        assertTrue(vmPayload.hasPartitionStats());
    }
    assertEquals(HOST, payload.getHost());
    assertEquals(PORT, payload.getPort());
    assertEquals(UniqueID, payload.getPathComponent());
}
Also used : VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) BigInteger(java.math.BigInteger) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) Text(org.apache.hadoop.io.Text) LinkedList(java.util.LinkedList) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 7 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestOrderedPartitionedKVOutput2 method testNonStartedOutput.

@Test(timeout = 5000)
public void testNonStartedOutput() throws IOException {
    OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, workingDir);
    int numPartitions = 10;
    OrderedPartitionedKVOutput output = new OrderedPartitionedKVOutput(outputContext, numPartitions);
    output.initialize();
    List<Event> events = output.close();
    assertEquals(2, events.size());
    Event event1 = events.get(0);
    assertTrue(event1 instanceof VertexManagerEvent);
    Event event2 = events.get(1);
    assertTrue(event2 instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) event2;
    ByteBuffer bb = cdme.getUserPayload();
    ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb));
    assertTrue(shufflePayload.hasEmptyPartitions());
    byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(shufflePayload.getEmptyPartitions());
    BitSet emptyPartionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions);
    assertEquals(numPartitions, emptyPartionsBitSet.cardinality());
    for (int i = 0; i < numPartitions; i++) {
        assertTrue(emptyPartionsBitSet.get(i));
    }
}
Also used : VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) BitSet(java.util.BitSet) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) Event(org.apache.tez.runtime.api.Event) ByteBuffer(java.nio.ByteBuffer) OutputContext(org.apache.tez.runtime.api.OutputContext) ShuffleUserPayloads(org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads) Test(org.junit.Test)

Example 8 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestShuffleVertexManagerBase method testVMEventFirst.

// Test vmEvent and vertexStatusUpdate before started.
@Test(timeout = 5000)
public void testVMEventFirst() throws IOException {
    Configuration conf = new Configuration();
    ShuffleVertexManagerBase manager;
    final String mockSrcVertexId1 = "Vertex1";
    final String mockSrcVertexId2 = "Vertex2";
    final String mockSrcVertexId3 = "Vertex3";
    final String mockManagedVertexId = "Vertex4";
    final List<Integer> scheduledTasks = Lists.newLinkedList();
    final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, null);
    VertexManagerEvent vmEvent = getVertexManagerEvent(null, 1L, "Vertex");
    manager = createManager(conf, mockContext, 0.01f, 0.75f);
    // no tasks scheduled
    Assert.assertEquals(4, manager.pendingTasks.size());
    Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
    TezTaskAttemptID taId1 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_0");
    vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId1));
    manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
    manager.onVertexManagerEventReceived(vmEvent);
    // nothing happens
    Assert.assertEquals(0, manager.numVertexManagerEventsReceived);
    // now the processing happens
    manager.onVertexStarted(emptyCompletions);
    Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
}
Also used : VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) VertexStateUpdate(org.apache.tez.dag.api.event.VertexStateUpdate) Configuration(org.apache.hadoop.conf.Configuration) VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) TaskAttemptIdentifierImpl(org.apache.tez.dag.records.TaskAttemptIdentifierImpl) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 9 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestShuffleVertexManagerBase method testAutoParallelism.

@Test(timeout = 5000)
public void testAutoParallelism() throws Exception {
    Configuration conf = new Configuration();
    ShuffleVertexManagerBase manager;
    final String mockSrcVertexId1 = "Vertex1";
    final String mockSrcVertexId2 = "Vertex2";
    final String mockSrcVertexId3 = "Vertex3";
    final String mockManagedVertexId = "Vertex4";
    final List<Integer> scheduledTasks = Lists.newLinkedList();
    final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>();
    final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, newEdgeManagers);
    // parallelism changed due to small data size
    manager = createManager(conf, mockContext, 0.5f, 0.5f);
    manager.onVertexStarted(emptyCompletions);
    manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
    manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED));
    manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED));
    // no tasks scheduled
    Assert.assertEquals(4, manager.pendingTasks.size());
    Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
    // task completion from non-bipartite stage does nothing
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId3, 0));
    // no tasks scheduled
    Assert.assertEquals(4, manager.pendingTasks.size());
    Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks);
    Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
    VertexManagerEvent vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId1);
    manager.onVertexManagerEventReceived(vmEvent);
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
    Assert.assertEquals(4, manager.pendingTasks.size());
    // no tasks scheduled
    Assert.assertEquals(0, scheduledTasks.size());
    Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
    Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
    Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
    // ignore duplicate completion
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, 0));
    Assert.assertEquals(4, manager.pendingTasks.size());
    // no tasks scheduled
    Assert.assertEquals(0, scheduledTasks.size());
    Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted);
    Assert.assertEquals(50 * MB, manager.completedSourceTasksOutputSize);
    vmEvent = getVertexManagerEvent(null, 50 * MB, mockSrcVertexId2);
    manager.onVertexManagerEventReceived(vmEvent);
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 1));
    // managedVertex tasks reduced
    verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
    verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap());
    Assert.assertEquals(2, newEdgeManagers.size());
    // TODO improve tests for parallelism
    // all tasks scheduled
    Assert.assertEquals(0, manager.pendingTasks.size());
    Assert.assertEquals(2, scheduledTasks.size());
    Assert.assertTrue(scheduledTasks.contains(new Integer(0)));
    Assert.assertTrue(scheduledTasks.contains(new Integer(1)));
    Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted);
    Assert.assertEquals(2, manager.numVertexManagerEventsReceived);
    Assert.assertEquals(100 * MB, manager.completedSourceTasksOutputSize);
    // more completions dont cause recalculation of parallelism
    manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0));
    verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap());
    Assert.assertEquals(2, newEdgeManagers.size());
    EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand) newEdgeManagers.values().iterator().next();
    // 4 source task outputs - same as original number of partitions
    Assert.assertEquals(4, edgeManager.getNumSourceTaskPhysicalOutputs(0));
    // 4 destination task inputs - 2 source tasks * 2 merged partitions
    Assert.assertEquals(4, edgeManager.getNumDestinationTaskPhysicalInputs(0));
    EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeDataMovementEventToDestination(1, 1, 0);
    Assert.assertEquals(1, routeMetadata.getNumEvents());
    Assert.assertEquals(3, routeMetadata.getTargetIndices()[0]);
    routeMetadata = edgeManager.routeDataMovementEventToDestination(0, 2, 1);
    Assert.assertEquals(1, routeMetadata.getNumEvents());
    Assert.assertEquals(0, routeMetadata.getTargetIndices()[0]);
    routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 0);
    Assert.assertEquals(2, routeMetadata.getNumEvents());
    Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
    Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
    routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(1, 1);
    Assert.assertEquals(2, routeMetadata.getNumEvents());
    Assert.assertEquals(2, routeMetadata.getTargetIndices()[0]);
    Assert.assertEquals(3, routeMetadata.getTargetIndices()[1]);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) HashMap(java.util.HashMap) EdgeManagerPluginOnDemand(org.apache.tez.dag.api.EdgeManagerPluginOnDemand) EdgeManagerPlugin(org.apache.tez.dag.api.EdgeManagerPlugin) VertexStateUpdate(org.apache.tez.dag.api.event.VertexStateUpdate) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) Test(org.junit.Test)

Example 10 with VertexManagerEvent

use of org.apache.tez.runtime.api.events.VertexManagerEvent in project tez by apache.

the class TestShuffleVertexManagerBase method testPartitionStats.

// Test partition stats.
@Test(timeout = 5000)
public void testPartitionStats() throws IOException {
    Configuration conf = new Configuration();
    ShuffleVertexManagerBase manager;
    final String mockSrcVertexId1 = "Vertex1";
    final String mockSrcVertexId2 = "Vertex2";
    final String mockSrcVertexId3 = "Vertex3";
    final String mockManagedVertexId = "Vertex4";
    final List<Integer> scheduledTasks = Lists.newLinkedList();
    final VertexManagerPluginContext mockContext = createVertexManagerContext(mockSrcVertexId1, 2, mockSrcVertexId2, 2, mockSrcVertexId3, 2, mockManagedVertexId, 4, scheduledTasks, null);
    // {5,9,12,18} in bitmap
    final long MB = 1024l * 1024l;
    long[] sizes = new long[] { (0l), (1 * MB), (964 * MB), (48 * MB) };
    VertexManagerEvent vmEvent = getVertexManagerEvent(sizes, 0, "Vertex", false);
    manager = createManager(conf, mockContext, 0.01f, 0.75f);
    manager.onVertexStarted(emptyCompletions);
    // no tasks scheduled
    Assert.assertEquals(4, manager.pendingTasks.size());
    Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
    TezTaskAttemptID taId1 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_0");
    vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId1));
    manager.onVertexManagerEventReceived(vmEvent);
    Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
    // 0 MB bucket
    Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
    // 1 MB bucket
    Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
    // 100 MB bucket
    Assert.assertEquals(100, manager.getCurrentlyKnownStatsAtIndex(2));
    // 10 MB bucket
    Assert.assertEquals(10, manager.getCurrentlyKnownStatsAtIndex(3));
    // sending again from a different version of the same task has not impact
    TezTaskAttemptID taId2 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_1");
    vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId2));
    manager.onVertexManagerEventReceived(vmEvent);
    Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
    // 0 MB bucket
    Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
    // 1 MB bucket
    Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
    // 100 MB bucket
    Assert.assertEquals(100, manager.getCurrentlyKnownStatsAtIndex(2));
    // 10 MB bucket
    Assert.assertEquals(10, manager.getCurrentlyKnownStatsAtIndex(3));
    // Testing for detailed partition stats
    vmEvent = getVertexManagerEvent(sizes, 0, "Vertex", true);
    manager = createManager(conf, mockContext, 0.01f, 0.75f);
    manager.onVertexStarted(emptyCompletions);
    // no tasks scheduled
    Assert.assertEquals(4, manager.pendingTasks.size());
    Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted);
    taId1 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_0");
    vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId1));
    manager.onVertexManagerEventReceived(vmEvent);
    Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
    Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
    Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
    Assert.assertEquals(964, manager.getCurrentlyKnownStatsAtIndex(2));
    Assert.assertEquals(48, manager.getCurrentlyKnownStatsAtIndex(3));
    // sending again from a different version of the same task has not impact
    taId2 = TezTaskAttemptID.fromString("attempt_1436907267600_195589_1_00_000000_1");
    vmEvent.setProducerAttemptIdentifier(new TaskAttemptIdentifierImpl("dag", mockSrcVertexId1, taId2));
    manager.onVertexManagerEventReceived(vmEvent);
    Assert.assertEquals(1, manager.numVertexManagerEventsReceived);
    Assert.assertEquals(0, manager.getCurrentlyKnownStatsAtIndex(0));
    Assert.assertEquals(1, manager.getCurrentlyKnownStatsAtIndex(1));
    Assert.assertEquals(964, manager.getCurrentlyKnownStatsAtIndex(2));
    Assert.assertEquals(48, manager.getCurrentlyKnownStatsAtIndex(3));
}
Also used : VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) Configuration(org.apache.hadoop.conf.Configuration) VertexManagerPluginContext(org.apache.tez.dag.api.VertexManagerPluginContext) TaskAttemptIdentifierImpl(org.apache.tez.dag.records.TaskAttemptIdentifierImpl) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Aggregations

VertexManagerEvent (org.apache.tez.runtime.api.events.VertexManagerEvent)30 Test (org.junit.Test)15 ByteString (com.google.protobuf.ByteString)12 Configuration (org.apache.hadoop.conf.Configuration)12 CompositeDataMovementEvent (org.apache.tez.runtime.api.events.CompositeDataMovementEvent)12 Event (org.apache.tez.runtime.api.Event)10 BitSet (java.util.BitSet)8 HashMap (java.util.HashMap)8 VertexStateUpdate (org.apache.tez.dag.api.event.VertexStateUpdate)8 ByteBuffer (java.nio.ByteBuffer)6 Path (org.apache.hadoop.fs.Path)6 VertexManagerPluginContext (org.apache.tez.dag.api.VertexManagerPluginContext)6 DataMovementEventPayloadProto (org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto)5 Configurable (org.apache.hadoop.conf.Configurable)4 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)4 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)4 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)4 TezCounter (org.apache.tez.common.counters.TezCounter)4 TezCounters (org.apache.tez.common.counters.TezCounters)4 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)4