Search in sources :

Example 56 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project tez by apache.

the class TestTaskExecution2 method createTaskRunner.

private TezTaskRunner2 createTaskRunner(ApplicationId appId, TaskExecutionTestHelpers.TezTaskUmbilicalForTest umbilical, TaskReporter taskReporter, ListeningExecutorService executor, String processorClass, byte[] processorConf, boolean testRunner, boolean updateSysCounters) throws IOException {
    TezConfiguration tezConf = new TezConfiguration(defaultConf);
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    Path testDir = new Path(workDir, UUID.randomUUID().toString());
    String[] localDirs = new String[] { testDir.toString() };
    TezDAGID dagId = TezDAGID.getInstance(appId, 1);
    TezVertexID vertexId = TezVertexID.getInstance(dagId, 1);
    TezTaskID taskId = TezTaskID.getInstance(vertexId, 1);
    TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 1);
    ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create(processorClass).setUserPayload(UserPayload.create(ByteBuffer.wrap(processorConf)));
    TaskSpec taskSpec = new TaskSpec(taskAttemptId, "dagName", "vertexName", -1, processorDescriptor, new ArrayList<InputSpec>(), new ArrayList<OutputSpec>(), null, null);
    TezExecutors sharedExecutor = new TezSharedExecutor(tezConf);
    TezTaskRunner2 taskRunner;
    if (testRunner) {
        taskRunner = new TezTaskRunner2ForTest(tezConf, ugi, localDirs, taskSpec, 1, new HashMap<String, ByteBuffer>(), new HashMap<String, String>(), HashMultimap.<String, String>create(), taskReporter, executor, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), updateSysCounters, sharedExecutor);
    } else {
        taskRunner = new TezTaskRunner2(tezConf, ugi, localDirs, taskSpec, 1, new HashMap<String, ByteBuffer>(), new HashMap<String, String>(), HashMultimap.<String, String>create(), taskReporter, executor, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), updateSysCounters, new DefaultHadoopShim(), sharedExecutor);
    }
    return taskRunner;
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) TezTaskID(org.apache.tez.dag.records.TezTaskID) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) TezDAGID(org.apache.tez.dag.records.TezDAGID) TezExecutors(org.apache.tez.common.TezExecutors) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec)

Example 57 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project tez by apache.

the class TestTezEvent method testSerialization.

@Test
public void testSerialization() throws IOException {
    ArrayList<TezEvent> events = new ArrayList<TezEvent>();
    Configuration conf = new Configuration(true);
    String confVal = RandomStringUtils.random(10000, true, true);
    conf.set("testKey", confVal);
    UserPayload payload = TezUtils.createUserPayloadFromConf(conf);
    TezTaskAttemptID srcTAID = TezTaskAttemptID.getInstance(TezTaskID.fromString("task_1454468251169_866787_1_02_000000"), 1000);
    TezTaskAttemptID destTAID = TezTaskAttemptID.getInstance(TezTaskID.fromString("task_1454468251169_866787_1_02_000000"), 2000);
    EventMetaData srcInfo = new EventMetaData(EventProducerConsumerType.OUTPUT, "v1", "v2", srcTAID);
    EventMetaData destInfo = new EventMetaData(EventProducerConsumerType.OUTPUT, "v3", "v4", destTAID);
    // Case of size less than 4K and parsing skipped during deserialization
    events.add(new TezEvent(new TaskAttemptCompletedEvent(), new EventMetaData(EventProducerConsumerType.PROCESSOR, "v1", "v2", srcTAID)));
    TezEvent dmeEvent = new TezEvent(DataMovementEvent.create(1000, 3, 1, payload.getPayload()), srcInfo, System.currentTimeMillis());
    dmeEvent.setDestinationInfo(destInfo);
    events.add(dmeEvent);
    // Different code path
    events.add(new TezEvent(new TaskStatusUpdateEvent(null, 0.1f, null, false), new EventMetaData(EventProducerConsumerType.PROCESSOR, "v5", "v6", srcTAID)));
    // Serialize to different types of DataOutput
    // One that implements OutputStream and one that does not
    DataOutputBuffer dataout = new DataOutputBuffer();
    ByteArrayDataOutput bout = ByteStreams.newDataOutput();
    serializeEvents(events, dataout);
    serializeEvents(events, bout);
    // Deserialize from different types of DataInput
    // One with DataInputBuffer and another different implementation
    DataInputBuffer datain = new DataInputBuffer();
    datain.reset(dataout.getData(), dataout.getLength());
    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(dataout.getData(), 0, dataout.getLength()));
    ArrayList<TezEvent> actual1 = deserializeEvents(datain);
    ArrayList<TezEvent> actual2 = deserializeEvents(dis);
    assertEventEquals(events, actual1);
    assertEventEquals(events, actual2);
    byte[] serializedBytes = bout.toByteArray();
    datain.reset(serializedBytes, serializedBytes.length);
    dis = new DataInputStream(new ByteArrayInputStream(serializedBytes));
    actual1 = deserializeEvents(datain);
    actual2 = deserializeEvents(dis);
    assertEventEquals(events, actual1);
    assertEventEquals(events, actual2);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) UserPayload(org.apache.tez.dag.api.UserPayload) ArrayList(java.util.ArrayList) TaskStatusUpdateEvent(org.apache.tez.runtime.api.events.TaskStatusUpdateEvent) DataInputStream(java.io.DataInputStream) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayDataOutput(com.google.common.io.ByteArrayDataOutput) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) TaskAttemptCompletedEvent(org.apache.tez.runtime.api.events.TaskAttemptCompletedEvent) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 58 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project tez by apache.

the class TestLogicalIOProcessorRuntimeTask method testAutoStart.

@Test(timeout = 5000)
public void testAutoStart() throws Exception {
    TezDAGID dagId = createTezDagId();
    TezVertexID vertexId = createTezVertexId(dagId);
    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    Multimap<String, String> startedInputsMap = HashMultimap.create();
    TezUmbilical umbilical = mock(TezUmbilical.class);
    TezConfiguration tezConf = new TezConfiguration();
    tezConf.set(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ALLOCATOR_CLASS, ScalingAllocator.class.getName());
    TezTaskAttemptID taId1 = createTaskAttemptID(vertexId, 1);
    TaskSpec task1 = createTaskSpec(taId1, "dag1", "vertex1", 30);
    TezTaskAttemptID taId2 = createTaskAttemptID(vertexId, 2);
    TaskSpec task2 = createTaskSpec(taId2, "dag2", "vertex1", 10);
    TezSharedExecutor sharedExecutor = new TezSharedExecutor(tezConf);
    LogicalIOProcessorRuntimeTask lio1 = new LogicalIOProcessorRuntimeTask(task1, 0, tezConf, null, umbilical, serviceConsumerMetadata, new HashMap<String, String>(), startedInputsMap, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
    try {
        lio1.initialize();
        lio1.run();
        lio1.close();
        // Input should've been started, Output should not have been started
        assertEquals(1, TestProcessor.runCount);
        assertEquals(1, TestInput.startCount);
        assertEquals(0, TestOutput.startCount);
        // test that invocations of progress are counted correctly
        assertEquals(true, lio1.getAndClearProgressNotification());
        // cleared after getting
        assertEquals(false, lio1.getAndClearProgressNotification());
        assertEquals(30, TestInput.vertexParallelism);
        assertEquals(0, TestOutput.vertexParallelism);
        assertEquals(30, lio1.getProcessorContext().getVertexParallelism());
        assertEquals(30, lio1.getInputContexts().iterator().next().getVertexParallelism());
        assertEquals(30, lio1.getOutputContexts().iterator().next().getVertexParallelism());
    } catch (Exception e) {
        fail();
        sharedExecutor.shutdownNow();
    } finally {
        cleanupAndTest(lio1);
    }
    // local mode
    tezConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
    LogicalIOProcessorRuntimeTask lio2 = new LogicalIOProcessorRuntimeTask(task2, 0, tezConf, null, umbilical, serviceConsumerMetadata, new HashMap<String, String>(), startedInputsMap, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
    try {
        lio2.initialize();
        lio2.run();
        lio2.close();
        // Input should not have been started again, Output should not have been started
        assertEquals(2, TestProcessor.runCount);
        assertEquals(1, TestInput.startCount);
        assertEquals(0, TestOutput.startCount);
        assertEquals(30, TestInput.vertexParallelism);
        assertEquals(0, TestOutput.vertexParallelism);
        // Check if parallelism is available in processor/ i/p / o/p contexts
        assertEquals(10, lio2.getProcessorContext().getVertexParallelism());
        assertEquals(10, lio2.getInputContexts().iterator().next().getVertexParallelism());
        assertEquals(10, lio2.getOutputContexts().iterator().next().getVertexParallelism());
    } catch (Exception e) {
        fail();
    } finally {
        cleanupAndTest(lio2);
        sharedExecutor.shutdownNow();
    }
}
Also used : HashMap(java.util.HashMap) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) ScalingAllocator(org.apache.tez.runtime.common.resources.ScalingAllocator) ByteBuffer(java.nio.ByteBuffer) DefaultHadoopShim(org.apache.tez.hadoop.shim.DefaultHadoopShim) TezDAGID(org.apache.tez.dag.records.TezDAGID) TezSharedExecutor(org.apache.tez.common.TezSharedExecutor) TezUmbilical(org.apache.tez.runtime.api.impl.TezUmbilical) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 59 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project tez by apache.

the class TestTaskSpec method testSerDe.

@Test(timeout = 5000)
public void testSerDe() throws IOException {
    ByteBuffer payload = null;
    ProcessorDescriptor procDesc = ProcessorDescriptor.create("proc").setUserPayload(UserPayload.create(payload)).setHistoryText("historyText");
    List<InputSpec> inputSpecs = new ArrayList<>();
    InputSpec inputSpec = new InputSpec("src1", InputDescriptor.create("inputClass"), 10);
    inputSpecs.add(inputSpec);
    List<OutputSpec> outputSpecs = new ArrayList<>();
    OutputSpec outputSpec = new OutputSpec("dest1", OutputDescriptor.create("outputClass"), 999);
    outputSpecs.add(outputSpec);
    List<GroupInputSpec> groupInputSpecs = null;
    Configuration taskConf = new Configuration(false);
    taskConf.set("foo", "bar");
    TezTaskAttemptID taId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(TezVertexID.getInstance(TezDAGID.getInstance("1234", 1, 1), 1), 1), 1);
    TaskSpec taskSpec = new TaskSpec(taId, "dagName", "vName", -1, procDesc, inputSpecs, outputSpecs, groupInputSpecs, taskConf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    DataOutput out = new DataOutputStream(bos);
    taskSpec.write(out);
    TaskSpec deSerTaskSpec = new TaskSpec();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    DataInput in = new DataInputStream(bis);
    deSerTaskSpec.readFields(in);
    Assert.assertEquals(taskSpec.getDAGName(), deSerTaskSpec.getDAGName());
    Assert.assertEquals(taskSpec.getVertexName(), deSerTaskSpec.getVertexName());
    Assert.assertEquals(taskSpec.getVertexParallelism(), deSerTaskSpec.getVertexParallelism());
    Assert.assertEquals(taskSpec.getInputs().size(), deSerTaskSpec.getInputs().size());
    Assert.assertEquals(taskSpec.getOutputs().size(), deSerTaskSpec.getOutputs().size());
    Assert.assertNull(deSerTaskSpec.getGroupInputs());
    Assert.assertEquals(taskSpec.getInputs().get(0).getSourceVertexName(), deSerTaskSpec.getInputs().get(0).getSourceVertexName());
    Assert.assertEquals(taskSpec.getOutputs().get(0).getDestinationVertexName(), deSerTaskSpec.getOutputs().get(0).getDestinationVertexName());
    Assert.assertEquals(taskConf.get("foo"), deSerTaskSpec.getTaskConf().get("foo"));
}
Also used : DataOutput(java.io.DataOutput) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) ByteBuffer(java.nio.ByteBuffer) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 60 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project tez by apache.

the class TestMockDAGAppMaster method testMixedEdgeRouting.

@Test(timeout = 100000)
public void testMixedEdgeRouting() throws Exception {
    TezConfiguration tezconf = new TezConfiguration(defaultConf);
    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
    tezClient.start();
    MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
    MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
    mockLauncher.startScheduling(false);
    mockApp.eventsDelegate = new TestEventsDelegate();
    DAG dag = DAG.create("testMixedEdgeRouting");
    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 1);
    Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), 1);
    Vertex vC = Vertex.create("C", ProcessorDescriptor.create("Proc.class"), 1);
    Vertex vD = Vertex.create("D", ProcessorDescriptor.create("Proc.class"), 1);
    Vertex vE = Vertex.create("E", ProcessorDescriptor.create("Proc.class"), 1);
    dag.addVertex(vA).addVertex(vB).addVertex(vC).addVertex(vD).addVertex(vE).addEdge(Edge.create(vA, vC, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In")))).addEdge(Edge.create(vB, vC, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In")))).addEdge(Edge.create(vA, vD, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In")))).addEdge(Edge.create(vB, vD, EdgeProperty.create(EdgeManagerPluginDescriptor.create(LegacyEdgeTestEdgeManager.class.getName()), DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In")))).addEdge(Edge.create(vB, vE, EdgeProperty.create(EdgeManagerPluginDescriptor.create(LegacyEdgeTestEdgeManager.class.getName()), DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
    DAGClient dagClient = tezClient.submitDAG(dag);
    mockLauncher.waitTillContainersLaunched();
    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
    mockLauncher.startScheduling(true);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
    // vC uses on demand routing and its task does not provide events
    VertexImpl vImpl = (VertexImpl) dagImpl.getVertex(vC.getName());
    TaskImpl tImpl = (TaskImpl) vImpl.getTask(0);
    TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
    Assert.assertEquals(0, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
    // vD is mixed mode and only 1 out of 2 edges does legacy routing with task providing events
    vImpl = (VertexImpl) dagImpl.getVertex(vD.getName());
    tImpl = (TaskImpl) vImpl.getTask(0);
    taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
    Assert.assertEquals(1, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
    // vE has single legacy edge and does not use on demand routing and its task provides events
    vImpl = (VertexImpl) dagImpl.getVertex(vE.getName());
    tImpl = (TaskImpl) vImpl.getTask(0);
    taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0);
    Assert.assertEquals(1, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size());
    tezClient.stop();
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) TaskImpl(org.apache.tez.dag.app.dag.impl.TaskImpl) DAG(org.apache.tez.dag.api.DAG) MockContainerLauncher(org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher) DAGImpl(org.apache.tez.dag.app.dag.impl.DAGImpl) DAGClient(org.apache.tez.dag.api.client.DAGClient) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) VertexImpl(org.apache.tez.dag.app.dag.impl.VertexImpl) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Aggregations

TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)167 Test (org.junit.Test)124 TezTaskID (org.apache.tez.dag.records.TezTaskID)61 TezVertexID (org.apache.tez.dag.records.TezVertexID)54 Container (org.apache.hadoop.yarn.api.records.Container)48 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)46 TezDAGID (org.apache.tez.dag.records.TezDAGID)43 Configuration (org.apache.hadoop.conf.Configuration)42 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)41 Priority (org.apache.hadoop.yarn.api.records.Priority)41 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)41 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)33 Resource (org.apache.hadoop.yarn.api.records.Resource)30 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)28 EventMetaData (org.apache.tez.runtime.api.impl.EventMetaData)28 ClusterInfo (org.apache.tez.dag.app.ClusterInfo)27 ContainerHeartbeatHandler (org.apache.tez.dag.app.ContainerHeartbeatHandler)27 AMContainerMap (org.apache.tez.dag.app.rm.container.AMContainerMap)27 ContainerContextMatcher (org.apache.tez.dag.app.rm.container.ContainerContextMatcher)27 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)25