Search in sources :

Example 1 with DAGPlan

use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.

the class TestVertexImpl method createSamplerDAGPlan2.

// Create a plan with 3 vertices: A, B, C
// A -> C, B -> C
private DAGPlan createSamplerDAGPlan2() {
    LOG.info("Setting up sampler 2 dag plan");
    DAGPlan dag = DAGPlan.newBuilder().setName("TestSamplerDAG").addVertex(VertexPlan.newBuilder().setName("A").setProcessorDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("A.class")).setType(PlanVertexType.NORMAL).addTaskLocationHint(PlanTaskLocationHint.newBuilder().addHost("host1").addRack("rack1").build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("A.class").build()).addOutEdgeId("A_C").build()).addVertex(VertexPlan.newBuilder().setName("B").setProcessorDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("B.class")).setType(PlanVertexType.NORMAL).addTaskLocationHint(PlanTaskLocationHint.newBuilder().addHost("host2").addRack("rack2").build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(2).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("").build()).addOutEdgeId("B_C").build()).addVertex(VertexPlan.newBuilder().setName("C").setType(PlanVertexType.NORMAL).setProcessorDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("C.class")).addTaskLocationHint(PlanTaskLocationHint.newBuilder().addHost("host3").addRack("rack3").build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(2).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("foo").setTaskModule("x3.y3").build()).addInEdgeId("A_C").addInEdgeId("B_C").build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("A_C")).setInputVertexName("A").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("A_C.class")).setOutputVertexName("C").setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER).setId("A_C").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("B_C.class")).setInputVertexName("B").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("B_C.class")).setOutputVertexName("C").setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER).setId("B_C").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).build();
    return dag;
}
Also used : DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan)

Example 2 with DAGPlan

use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.

the class TestVertexImpl method createVertexGroupDAGPlan.

// Create a plan with 3 vertices: A, B, C. Group(A,B)->C
private DAGPlan createVertexGroupDAGPlan() {
    LOG.info("Setting up group dag plan");
    DAGPlan dag = DAGPlan.newBuilder().setName("TestGroupDAG").addVertex(VertexPlan.newBuilder().setName("A").setProcessorDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("A.class")).setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("A.class").build()).addOutEdgeId("A_C").build()).addVertex(VertexPlan.newBuilder().setName("B").setProcessorDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("B.class")).setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(2).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("").build()).addOutEdgeId("B_C").build()).addVertex(VertexPlan.newBuilder().setName("C").setType(PlanVertexType.NORMAL).setProcessorDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("C.class")).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(2).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("foo").setTaskModule("x3.y3").build()).addInEdgeId("A_C").addInEdgeId("B_C").build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("A_C")).setInputVertexName("A").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("A_C.class")).setOutputVertexName("C").setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER).setId("A_C").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("B_C.class")).setInputVertexName("B").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("B_C.class")).setOutputVertexName("C").setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER).setId("B_C").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addVertexGroups(PlanVertexGroupInfo.newBuilder().setGroupName("Group").addGroupMembers("A").addGroupMembers("B").addEdgeMergedInputs(PlanGroupInputEdgeInfo.newBuilder().setDestVertexName("C").setMergedInput(TezEntityDescriptorProto.newBuilder().setClassName("Group.class").build()).build())).build();
    return dag;
}
Also used : DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan)

Example 3 with DAGPlan

use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.

the class TestVertexImpl method createDAGPlanWithRunningInitializer3.

private DAGPlan createDAGPlanWithRunningInitializer3() {
    // v2    v1 (send event to v3)
    // \    /
    // \  /
    // v3 -----(In)
    // (Receive events from v1)
    LOG.info("Setting up dag plan with running input initializer3");
    DAGPlan dag = DAGPlan.newBuilder().setName("DagWithInputInitializer3").addVertex(VertexPlan.newBuilder().setName("vertex1").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(1).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build()).addOutEdgeId("e1").build()).addVertex(VertexPlan.newBuilder().setName("vertex2").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(1).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build()).addOutEdgeId("e2").build()).addVertex(VertexPlan.newBuilder().setName("vertex3").setType(PlanVertexType.NORMAL).addInputs(RootInputLeafOutputProto.newBuilder().setControllerDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("IrrelevantInitializerClassName")).setName("input1").setIODescriptor(TezEntityDescriptorProto.newBuilder().setClassName("InputClazz").build()).build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(20).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x2.y2").build()).addInEdgeId("e1").addInEdgeId("e2").build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v1_v3")).setInputVertexName("vertex1").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex3").setDataMovementType(PlanEdgeDataMovementType.BROADCAST).setId("e1").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v2_v3")).setInputVertexName("vertex2").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex3").setDataMovementType(PlanEdgeDataMovementType.BROADCAST).setId("e2").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).build();
    return dag;
}
Also used : DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan)

Example 4 with DAGPlan

use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.

the class TestVertexImpl method createDAGPlanWithVMException.

/**
 * v1 -> v2
 */
private DAGPlan createDAGPlanWithVMException(String initializerClassName, VMExceptionLocation exLocation) {
    LOG.info("Setting up dag plan with VertexManager which would throw exception");
    DAGPlan dag = DAGPlan.newBuilder().setName("initializerWith0Tasks").addVertex(VertexPlan.newBuilder().setName("vertex1").setType(PlanVertexType.NORMAL).addInputs(RootInputLeafOutputProto.newBuilder().setControllerDescriptor(TezEntityDescriptorProto.newBuilder().setClassName(initializerClassName)).setName("input1").setIODescriptor(TezEntityDescriptorProto.newBuilder().setClassName("InputClazz").build()).build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build()).addOutEdgeId("e1").setVertexManagerPlugin(TezEntityDescriptorProto.newBuilder().setClassName(VertexManagerWithException.class.getName()).setTezUserPayload(DAGProtos.TezUserPayloadProto.newBuilder().setUserPayload(ByteString.copyFrom(exLocation.name().getBytes())))).build()).addVertex(VertexPlan.newBuilder().setName("vertex2").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x2.y2").build()).addInEdgeId("e1").setVertexManagerPlugin(TezEntityDescriptorProto.newBuilder().setClassName(VertexManagerWithException.class.getName()).setTezUserPayload(DAGProtos.TezUserPayloadProto.newBuilder().setUserPayload(ByteString.copyFrom(exLocation.name().getBytes())))).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v1_v2")).setInputVertexName("vertex1").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex2").setDataMovementType(PlanEdgeDataMovementType.BROADCAST).setId("e1").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).build();
    return dag;
}
Also used : DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan)

Example 5 with DAGPlan

use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.

the class TestVertexImpl method createDAGPlanWithRunningInitializer4.

private DAGPlan createDAGPlanWithRunningInitializer4() {
    // v1 (send event to v3)
    // |
    // |
    // v2   (In)    (v2 can optioanlly send events to v2. Is setup via the initializer)
    // |   /
    // |  /
    // v3 (Receive events from v1)
    // Events are not generated by a directly connected vertex
    LOG.info("Setting up dag plan with running input initializer4");
    DAGPlan dag = DAGPlan.newBuilder().setName("DagWithInputInitializer4").addVertex(VertexPlan.newBuilder().setName("vertex1").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(1).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build()).addOutEdgeId("e1").build()).addVertex(VertexPlan.newBuilder().setName("vertex2").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(2).setVirtualCores(1).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build()).addInEdgeId("e1").addOutEdgeId("e2").build()).addVertex(VertexPlan.newBuilder().setName("vertex3").setType(PlanVertexType.NORMAL).addInputs(RootInputLeafOutputProto.newBuilder().setControllerDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("IrrelevantInitializerClassName")).setName("input1").setIODescriptor(TezEntityDescriptorProto.newBuilder().setClassName("InputClazz").build()).build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(20).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x2.y2").build()).addInEdgeId("e2").build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v1_v3")).setInputVertexName("vertex1").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex2").setDataMovementType(PlanEdgeDataMovementType.BROADCAST).setId("e1").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v2_v3")).setInputVertexName("vertex2").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex3").setDataMovementType(PlanEdgeDataMovementType.BROADCAST).setId("e2").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).build();
    return dag;
}
Also used : DAGPlan(org.apache.tez.dag.api.records.DAGProtos.DAGPlan)

Aggregations

DAGPlan (org.apache.tez.dag.api.records.DAGProtos.DAGPlan)61 TezDAGID (org.apache.tez.dag.records.TezDAGID)20 Path (org.apache.hadoop.fs.Path)19 DAGHistoryEvent (org.apache.tez.dag.history.DAGHistoryEvent)18 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)18 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)17 Configuration (org.apache.hadoop.conf.Configuration)16 Test (org.junit.Test)16 SystemClock (org.apache.hadoop.yarn.util.SystemClock)15 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)14 DAGRecoveryData (org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData)12 RecoveryService (org.apache.tez.dag.history.recovery.RecoveryService)11 HashMap (java.util.HashMap)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)7 VertexPlan (org.apache.tez.dag.api.records.DAGProtos.VertexPlan)7 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)6 TezVertexID (org.apache.tez.dag.records.TezVertexID)6 DefaultHadoopShim (org.apache.tez.hadoop.shim.DefaultHadoopShim)5 Credentials (org.apache.hadoop.security.Credentials)4 DAGInitializedEvent (org.apache.tez.dag.history.events.DAGInitializedEvent)4