Search in sources :

Example 1 with OutputCommitterDescriptor

use of org.apache.tez.dag.api.OutputCommitterDescriptor in project tez by apache.

the class VertexImpl method setAdditionalOutputs.

@Override
public void setAdditionalOutputs(List<RootInputLeafOutputProto> outputs) {
    LOG.info("Setting " + outputs.size() + " additional outputs for vertex " + this.logIdentifier);
    this.additionalOutputs = Maps.newHashMapWithExpectedSize(outputs.size());
    this.outputCommitters = Maps.newHashMapWithExpectedSize(outputs.size());
    for (RootInputLeafOutputProto output : outputs) {
        addIO(output.getName());
        OutputDescriptor od = DagTypeConverters.convertOutputDescriptorFromDAGPlan(output.getIODescriptor());
        this.additionalOutputs.put(output.getName(), new RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor>(output.getName(), od, output.hasControllerDescriptor() ? DagTypeConverters.convertOutputCommitterDescriptorFromDAGPlan(output.getControllerDescriptor()) : null));
        OutputSpec outputSpec = new OutputSpec(output.getName(), od, 0);
        additionalOutputSpecs.add(outputSpec);
    }
}
Also used : OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) RootInputLeafOutputProto(org.apache.tez.dag.api.records.DAGProtos.RootInputLeafOutputProto) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec)

Example 2 with OutputCommitterDescriptor

use of org.apache.tez.dag.api.OutputCommitterDescriptor in project tez by apache.

the class TestCommit method createDAGPlan.

// v1->v3
// v2->v3
// vertex_group (v1, v2)
private DAGPlan createDAGPlan(boolean vertexGroupCommitSucceeded, boolean v3CommitSucceeded) throws Exception {
    LOG.info("Setting up group dag plan");
    int dummyTaskCount = 1;
    Resource dummyTaskResource = Resource.newInstance(1, 1);
    org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create("vertex1", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create("vertex2", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v3 = org.apache.tez.dag.api.Vertex.create("vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    DAG dag = DAG.create("testDag");
    String groupName1 = "uv12";
    OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!vertexGroupCommitSucceeded, true).toUserPayload())));
    OutputCommitterDescriptor ocd2 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!v3CommitSucceeded, true).toUserPayload())));
    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
    OutputDescriptor outDesc = OutputDescriptor.create("output.class");
    uv12.addDataSink("v12Out", DataSinkDescriptor.create(outDesc, ocd1, null));
    v3.addDataSink("v3Out", DataSinkDescriptor.create(outDesc, ocd2, null));
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("merge.class"));
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    return dag.createDag(conf, null, null, null, true);
}
Also used : OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Resource(org.apache.hadoop.yarn.api.records.Resource) DAG(org.apache.tez.dag.api.DAG) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Example 3 with OutputCommitterDescriptor

use of org.apache.tez.dag.api.OutputCommitterDescriptor in project tez by apache.

the class TestCommit method createDAGPlanWith2VertexGroupOutputs.

// v1->v3
// v2->v3
// vertex_group (v1, v2) has 2 shared outputs
private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSucceeded1, boolean vertexGroupCommitSucceeded2, boolean v3CommitSucceeded) throws Exception {
    LOG.info("Setting up group dag plan");
    int dummyTaskCount = 1;
    Resource dummyTaskResource = Resource.newInstance(1, 1);
    org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create("vertex1", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create("vertex2", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v3 = org.apache.tez.dag.api.Vertex.create("vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
    DAG dag = DAG.create("testDag");
    String groupName1 = "uv12";
    OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!vertexGroupCommitSucceeded1, true).toUserPayload())));
    OutputCommitterDescriptor ocd2 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!vertexGroupCommitSucceeded2, true).toUserPayload())));
    OutputCommitterDescriptor ocd3 = OutputCommitterDescriptor.create(CountingOutputCommitter.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(new CountingOutputCommitter.CountingOutputCommitterConfig(!v3CommitSucceeded, true).toUserPayload())));
    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
    OutputDescriptor outDesc = OutputDescriptor.create("output.class");
    uv12.addDataSink("v12Out1", DataSinkDescriptor.create(outDesc, ocd1, null));
    uv12.addDataSink("v12Out2", DataSinkDescriptor.create(outDesc, ocd2, null));
    v3.addDataSink("v3Out", DataSinkDescriptor.create(outDesc, ocd3, null));
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("merge.class"));
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    return dag.createDag(conf, null, null, null, true);
}
Also used : OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Resource(org.apache.hadoop.yarn.api.records.Resource) DAG(org.apache.tez.dag.api.DAG) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Example 4 with OutputCommitterDescriptor

use of org.apache.tez.dag.api.OutputCommitterDescriptor in project tez by apache.

the class TestMockDAGAppMaster method createOutputCommitterDesc.

private OutputCommitterDescriptor createOutputCommitterDesc(boolean failOnCommit) {
    OutputCommitterDescriptor outputCommitterDesc = OutputCommitterDescriptor.create(FailingOutputCommitter.class.getName());
    UserPayload payload = UserPayload.create(ByteBuffer.wrap(new FailingOutputCommitter.FailingOutputCommitterConfig(failOnCommit).toUserPayload()));
    outputCommitterDesc.setUserPayload(payload);
    return outputCommitterDesc;
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor)

Example 5 with OutputCommitterDescriptor

use of org.apache.tez.dag.api.OutputCommitterDescriptor in project tez by apache.

the class TestDAGUtils method createDAG.

@SuppressWarnings("deprecation")
private DAGPlan createDAG() {
    // Create a plan with 3 vertices: A, B, C. Group(A,B)->C
    Configuration conf = new Configuration(false);
    int dummyTaskCount = 1;
    Resource dummyTaskResource = Resource.newInstance(1, 1);
    org.apache.tez.dag.api.Vertex v1 = Vertex.create("vertex1", ProcessorDescriptor.create("Processor").setHistoryText("vertex1 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    v1.addDataSource("input1", DataSourceDescriptor.create(InputDescriptor.create("input.class").setHistoryText("input HistoryText"), null, null));
    org.apache.tez.dag.api.Vertex v2 = Vertex.create("vertex2", ProcessorDescriptor.create("Processor").setHistoryText("vertex2 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v3 = Vertex.create("vertex3", ProcessorDescriptor.create("Processor").setHistoryText("vertex3 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    DAG dag = DAG.create("testDag");
    dag.setCallerContext(CallerContext.create("context1", "callerId1", "callerType1", "desc1"));
    dag.setDAGInfo("dagInfo");
    String groupName1 = "uv12";
    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
    OutputDescriptor outDesc = OutputDescriptor.create("output.class").setHistoryText("uvOut HistoryText");
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(OutputCommitter.class.getName());
    uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
    v3.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class").setHistoryText("Dummy History Text"), InputDescriptor.create("dummy input class").setHistoryText("Dummy History Text")), InputDescriptor.create("merge.class").setHistoryText("Merge HistoryText"));
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    return dag.createDag(conf, null, null, null, true);
}
Also used : OutputCommitter(org.apache.tez.runtime.api.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Resource(org.apache.hadoop.yarn.api.records.Resource) DAG(org.apache.tez.dag.api.DAG) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) Vertex(org.apache.tez.dag.api.Vertex)

Aggregations

OutputCommitterDescriptor (org.apache.tez.dag.api.OutputCommitterDescriptor)11 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)9 DAG (org.apache.tez.dag.api.DAG)7 Resource (org.apache.hadoop.yarn.api.records.Resource)5 GroupInputEdge (org.apache.tez.dag.api.GroupInputEdge)4 Vertex (org.apache.tez.dag.api.Vertex)4 UserPayload (org.apache.tez.dag.api.UserPayload)3 Configuration (org.apache.hadoop.conf.Configuration)2 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)2 DataSourceDescriptor (org.apache.tez.dag.api.DataSourceDescriptor)2 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)2 CartesianProductConfig (org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig)2 CartesianProductVertexManager (org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager)2 ByteString (com.google.protobuf.ByteString)1 ArrayList (java.util.ArrayList)1 TreeMap (java.util.TreeMap)1 ParseException (org.apache.commons.cli.ParseException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1