Search in sources :

Example 1 with GroupInputEdge

use of org.apache.tez.dag.api.GroupInputEdge in project tez by apache.

the class MultipleCommitsExample method createDAG.

private DAG createDAG(TezConfiguration tezConf, String v1OutputPathPrefix, int v1OutputNum, String v2OutputPathPrefix, int v2OutputNum, String uv12OutputPathPrefix, int uv12OutputNum, String v3OutputPathPrefix, int v3OutputNum, boolean commitOnVertexSuccess) throws IOException {
    DAG dag = DAG.create("multipleCommitsDAG");
    dag.setConf(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, !commitOnVertexSuccess + "");
    Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(MultipleOutputProcessor.class.getName()).setUserPayload(new MultipleOutputProcessor.MultipleOutputProcessorConfig(V1OutputNamePrefix, v1OutputNum, UV12OutputNamePrefix, uv12OutputNum).toUserPayload()), 2);
    Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create(MultipleOutputProcessor.class.getName()).setUserPayload(new MultipleOutputProcessor.MultipleOutputProcessorConfig(V2OutputNamePrefix, v2OutputNum, UV12OutputNamePrefix, uv12OutputNum).toUserPayload()), 2);
    // add data sinks for v1
    for (int i = 0; i < v1OutputNum; ++i) {
        DataSinkDescriptor sink = MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, v1OutputPathPrefix + "_" + i).build();
        v1.addDataSink(V1OutputNamePrefix + "_" + i, sink);
    }
    // add data sinks for v2
    for (int i = 0; i < v2OutputNum; ++i) {
        DataSinkDescriptor sink = MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, v2OutputPathPrefix + "_" + i).build();
        v2.addDataSink(V2OutputNamePrefix + "_" + i, sink);
    }
    // add data sinks for (v1,v2)
    VertexGroup uv12 = dag.createVertexGroup("uv12", v1, v2);
    for (int i = 0; i < uv12OutputNum; ++i) {
        DataSinkDescriptor sink = MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, uv12OutputPathPrefix + "_" + i).build();
        uv12.addDataSink(UV12OutputNamePrefix + "_" + i, sink);
    }
    Vertex v3 = Vertex.create("v3", ProcessorDescriptor.create(MultipleOutputProcessor.class.getName()).setUserPayload(new MultipleOutputProcessor.MultipleOutputProcessorConfig(V3OutputNamePrefix, v3OutputNum).toUserPayload()), 2);
    // add data sinks for v3
    for (int i = 0; i < v3OutputNum; ++i) {
        DataSinkDescriptor sink = MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, v3OutputPathPrefix + "_" + i).build();
        v3.addDataSink(V3OutputNamePrefix + "_" + i, sink);
    }
    OrderedPartitionedKVEdgeConfig edgeConfig = OrderedPartitionedKVEdgeConfig.newBuilder(NullWritable.class.getName(), Text.class.getName(), HashPartitioner.class.getName()).setFromConfiguration(tezConf).build();
    GroupInputEdge edge = GroupInputEdge.create(uv12, v3, edgeConfig.createDefaultEdgeProperty(), InputDescriptor.create(ConcatenatedMergedKeyValuesInput.class.getName()));
    dag.addVertex(v1).addVertex(v2).addVertex(v3).addEdge(edge);
    return dag;
}
Also used : OrderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig) Vertex(org.apache.tez.dag.api.Vertex) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Text(org.apache.hadoop.io.Text) DAG(org.apache.tez.dag.api.DAG) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) NullWritable(org.apache.hadoop.io.NullWritable) VertexGroup(org.apache.tez.dag.api.VertexGroup) TextOutputFormat(org.apache.hadoop.mapred.TextOutputFormat) HashPartitioner(org.apache.tez.runtime.library.partitioner.HashPartitioner) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Example 2 with GroupInputEdge

use of org.apache.tez.dag.api.GroupInputEdge in project tez by apache.

the class TestCommit method createDAGPlan.

// v1->v3
// v2->v3
// vertex_group (v1, v2)
private DAGPlan createDAGPlan(boolean vertexGroupCommitSucceeded, boolean v3CommitSucceeded) throws Exception {
    LOG.info("Setting up group dag plan");
    int dummyTaskCount = 1;
    Resource dummyTaskResource = Resource.newInstance(1, 1);
    org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create("vertex1", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create("vertex2", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v3 = org.apache.tez.dag.api.Vertex.create("vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    DAG dag = DAG.create("testDag");
    String groupName1 = "uv12";
    OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!vertexGroupCommitSucceeded, true).toUserPayload())));
    OutputCommitterDescriptor ocd2 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!v3CommitSucceeded, true).toUserPayload())));
    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
    OutputDescriptor outDesc = OutputDescriptor.create("output.class");
    uv12.addDataSink("v12Out", DataSinkDescriptor.create(outDesc, ocd1, null));
    v3.addDataSink("v3Out", DataSinkDescriptor.create(outDesc, ocd2, null));
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("merge.class"));
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    return dag.createDag(conf, null, null, null, true);
}
Also used : OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Resource(org.apache.hadoop.yarn.api.records.Resource) DAG(org.apache.tez.dag.api.DAG) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Example 3 with GroupInputEdge

use of org.apache.tez.dag.api.GroupInputEdge in project tez by apache.

the class TestCommit method createDAGPlanWith2VertexGroupOutputs.

// v1->v3
// v2->v3
// vertex_group (v1, v2) has 2 shared outputs
private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSucceeded1, boolean vertexGroupCommitSucceeded2, boolean v3CommitSucceeded) throws Exception {
    LOG.info("Setting up group dag plan");
    int dummyTaskCount = 1;
    Resource dummyTaskResource = Resource.newInstance(1, 1);
    org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create("vertex1", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create("vertex2", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v3 = org.apache.tez.dag.api.Vertex.create("vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    DAG dag = DAG.create("testDag");
    String groupName1 = "uv12";
    OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!vertexGroupCommitSucceeded1, true).toUserPayload())));
    OutputCommitterDescriptor ocd2 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!vertexGroupCommitSucceeded2, true).toUserPayload())));
    OutputCommitterDescriptor ocd3 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!v3CommitSucceeded, true).toUserPayload())));
    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
    OutputDescriptor outDesc = OutputDescriptor.create("output.class");
    uv12.addDataSink("v12Out1", DataSinkDescriptor.create(outDesc, ocd1, null));
    uv12.addDataSink("v12Out2", DataSinkDescriptor.create(outDesc, ocd2, null));
    v3.addDataSink("v3Out", DataSinkDescriptor.create(outDesc, ocd3, null));
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("merge.class"));
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    return dag.createDag(conf, null, null, null, true);
}
Also used : OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Resource(org.apache.hadoop.yarn.api.records.Resource) DAG(org.apache.tez.dag.api.DAG) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Example 4 with GroupInputEdge

use of org.apache.tez.dag.api.GroupInputEdge in project tez by apache.

the class TestMockDAGAppMaster method createDAG.

private DAG createDAG(String dagName, boolean uv12CommitFail, boolean v3CommitFail) {
    DAG dag = DAG.create(dagName);
    Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create("Proc"), 1);
    Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("Proc"), 1);
    Vertex v3 = Vertex.create("v3", ProcessorDescriptor.create("Proc"), 1);
    VertexGroup uv12 = dag.createVertexGroup("uv12", v1, v2);
    DataSinkDescriptor uv12DataSink = DataSinkDescriptor.create(OutputDescriptor.create("dummy output"), createOutputCommitterDesc(uv12CommitFail), null);
    uv12.addDataSink("uv12Out", uv12DataSink);
    DataSinkDescriptor v3DataSink = DataSinkDescriptor.create(OutputDescriptor.create("dummy output"), createOutputCommitterDesc(v3CommitFail), null);
    v3.addDataSink("v3Out", v3DataSink);
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("merge.class"));
    dag.addVertex(v1).addVertex(v2).addVertex(v3).addEdge(e1);
    return dag;
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) VertexGroup(org.apache.tez.dag.api.VertexGroup) DAG(org.apache.tez.dag.api.DAG) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor)

Example 5 with GroupInputEdge

use of org.apache.tez.dag.api.GroupInputEdge in project tez by apache.

the class TestDAGUtils method createDAG.

@SuppressWarnings("deprecation")
private DAGPlan createDAG() {
    // Create a plan with 3 vertices: A, B, C. Group(A,B)->C
    Configuration conf = new Configuration(false);
    int dummyTaskCount = 1;
    Resource dummyTaskResource = Resource.newInstance(1, 1);
    org.apache.tez.dag.api.Vertex v1 = Vertex.create("vertex1", ProcessorDescriptor.create("Processor").setHistoryText("vertex1 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    v1.addDataSource("input1", DataSourceDescriptor.create(InputDescriptor.create("input.class").setHistoryText("input HistoryText"), null, null));
    org.apache.tez.dag.api.Vertex v2 = Vertex.create("vertex2", ProcessorDescriptor.create("Processor").setHistoryText("vertex2 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v3 = Vertex.create("vertex3", ProcessorDescriptor.create("Processor").setHistoryText("vertex3 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    DAG dag = DAG.create("testDag");
    dag.setCallerContext(CallerContext.create("context1", "callerId1", "callerType1", "desc1"));
    dag.setDAGInfo("dagInfo");
    String groupName1 = "uv12";
    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
    OutputDescriptor outDesc = OutputDescriptor.create("output.class").setHistoryText("uvOut HistoryText");
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(OutputCommitter.class.getName());
    uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
    v3.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class").setHistoryText("Dummy History Text"), InputDescriptor.create("dummy input class").setHistoryText("Dummy History Text")), InputDescriptor.create("merge.class").setHistoryText("Merge HistoryText"));
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    return dag.createDag(conf, null, null, null, true);
}
Also used : OutputCommitter(org.apache.tez.runtime.api.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Resource(org.apache.hadoop.yarn.api.records.Resource) DAG(org.apache.tez.dag.api.DAG) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) Vertex(org.apache.tez.dag.api.Vertex)

Aggregations

DAG (org.apache.tez.dag.api.DAG)8 GroupInputEdge (org.apache.tez.dag.api.GroupInputEdge)8 Vertex (org.apache.tez.dag.api.Vertex)5 Resource (org.apache.hadoop.yarn.api.records.Resource)4 OutputCommitterDescriptor (org.apache.tez.dag.api.OutputCommitterDescriptor)4 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)4 VertexGroup (org.apache.tez.dag.api.VertexGroup)4 LinkedList (java.util.LinkedList)2 Configuration (org.apache.hadoop.conf.Configuration)2 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)2 TezEdgeProperty (org.apache.hadoop.hive.ql.plan.TezEdgeProperty)2 EdgeType (org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType)2 UnionWork (org.apache.hadoop.hive.ql.plan.UnionWork)2 JobConf (org.apache.hadoop.mapred.JobConf)2 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)2 Edge (org.apache.tez.dag.api.Edge)2 JSONObject (org.json.JSONObject)2 ByteString (com.google.protobuf.ByteString)1 LinkedHashMap (java.util.LinkedHashMap)1 FileSystem (org.apache.hadoop.fs.FileSystem)1