Search in sources :

Example 11 with OutputDescriptor

use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.

the class TestDAGUtils method createDAG.

@SuppressWarnings("deprecation")
private DAGPlan createDAG() {
    // Create a plan with 3 vertices: A, B, C. Group(A,B)->C
    Configuration conf = new Configuration(false);
    int dummyTaskCount = 1;
    Resource dummyTaskResource = Resource.newInstance(1, 1);
    org.apache.tez.dag.api.Vertex v1 = Vertex.create("vertex1", ProcessorDescriptor.create("Processor").setHistoryText("vertex1 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    v1.addDataSource("input1", DataSourceDescriptor.create(InputDescriptor.create("input.class").setHistoryText("input HistoryText"), null, null));
    org.apache.tez.dag.api.Vertex v2 = Vertex.create("vertex2", ProcessorDescriptor.create("Processor").setHistoryText("vertex2 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    org.apache.tez.dag.api.Vertex v3 = Vertex.create("vertex3", ProcessorDescriptor.create("Processor").setHistoryText("vertex3 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
    DAG dag = DAG.create("testDag");
    dag.setCallerContext(CallerContext.create("context1", "callerId1", "callerType1", "desc1"));
    dag.setDAGInfo("dagInfo");
    String groupName1 = "uv12";
    org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
    OutputDescriptor outDesc = OutputDescriptor.create("output.class").setHistoryText("uvOut HistoryText");
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(OutputCommitter.class.getName());
    uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
    v3.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
    GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class").setHistoryText("Dummy History Text"), InputDescriptor.create("dummy input class").setHistoryText("Dummy History Text")), InputDescriptor.create("merge.class").setHistoryText("Merge HistoryText"));
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    return dag.createDag(conf, null, null, null, true);
}
Also used : OutputCommitter(org.apache.tez.runtime.api.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) Resource(org.apache.hadoop.yarn.api.records.Resource) DAG(org.apache.tez.dag.api.DAG) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) Vertex(org.apache.tez.dag.api.Vertex)

Example 12 with OutputDescriptor

use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.

the class YARNRunner method createVertexForStage.

private Vertex createVertexForStage(Configuration stageConf, Map<String, LocalResource> jobLocalResources, List<TaskLocationHint> locations, int stageNum, int totalStages) throws IOException {
    // stageNum starts from 0, goes till numStages - 1
    boolean isMap = false;
    if (stageNum == 0) {
        isMap = true;
    }
    int numTasks = isMap ? stageConf.getInt(MRJobConfig.NUM_MAPS, 0) : stageConf.getInt(MRJobConfig.NUM_REDUCES, 0);
    String processorName = isMap ? MapProcessor.class.getName() : ReduceProcessor.class.getName();
    String vertexName = null;
    if (isMap) {
        vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    } else {
        if (stageNum == totalStages - 1) {
            vertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
        } else {
            vertexName = MultiStageMRConfigUtil.getIntermediateStageVertexName(stageNum);
        }
    }
    Resource taskResource = isMap ? MRHelpers.getResourceForMRMapper(stageConf) : MRHelpers.getResourceForMRReducer(stageConf);
    stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part");
    UserPayload vertexUserPayload = TezUtils.createUserPayloadFromConf(stageConf);
    Vertex vertex = Vertex.create(vertexName, ProcessorDescriptor.create(processorName).setUserPayload(vertexUserPayload), numTasks, taskResource);
    if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
        vertex.getProcessorDescriptor().setHistoryText(TezUtils.convertToHistoryText(stageConf));
    }
    if (isMap) {
        vertex.addDataSource("MRInput", configureMRInputWithLegacySplitsGenerated(stageConf, true));
    }
    // Map only jobs.
    if (stageNum == totalStages - 1) {
        OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexUserPayload);
        if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
            od.setHistoryText(TezUtils.convertToHistoryText(stageConf));
        }
        vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
    }
    Map<String, String> taskEnv = new HashMap<String, String>();
    setupMapReduceEnv(stageConf, taskEnv, isMap);
    Map<String, LocalResource> taskLocalResources = new TreeMap<String, LocalResource>();
    // PRECOMMIT Remove split localization for reduce tasks if it's being set
    // here
    taskLocalResources.putAll(jobLocalResources);
    String taskJavaOpts = isMap ? MRHelpers.getJavaOptsForMRMapper(stageConf) : MRHelpers.getJavaOptsForMRReducer(stageConf);
    vertex.setTaskEnvironment(taskEnv).addTaskLocalFiles(taskLocalResources).setLocationHint(VertexLocationHint.create(locations)).setTaskLaunchCmdOpts(taskJavaOpts);
    if (!isMap) {
        vertex.setVertexManagerPlugin((ShuffleVertexManager.createConfigBuilder(stageConf).build()));
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Adding vertex to DAG" + ", vertexName=" + vertex.getName() + ", processor=" + vertex.getProcessorDescriptor().getClassName() + ", parallelism=" + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts() + ", resources=" + vertex.getTaskResource());
    }
    return vertex;
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) UserPayload(org.apache.tez.dag.api.UserPayload) HashMap(java.util.HashMap) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) TreeMap(java.util.TreeMap) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) ReduceProcessor(org.apache.tez.mapreduce.processor.reduce.ReduceProcessor) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) MROutputCommitter(org.apache.tez.mapreduce.committer.MROutputCommitter) MapProcessor(org.apache.tez.mapreduce.processor.map.MapProcessor)

Example 13 with OutputDescriptor

use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.

the class ProtoConverters method getOutputSpecFromProto.

public static OutputSpec getOutputSpecFromProto(IOSpecProto outputSpecProto) {
    OutputDescriptor outputDescriptor = null;
    if (outputSpecProto.hasIoDescriptor()) {
        outputDescriptor = DagTypeConverters.convertOutputDescriptorFromDAGPlan(outputSpecProto.getIoDescriptor());
    }
    OutputSpec outputSpec = new OutputSpec(outputSpecProto.getConnectedVertexName(), outputDescriptor, outputSpecProto.getPhysicalEdgeCount());
    return outputSpec;
}
Also used : OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec)

Example 14 with OutputDescriptor

use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.

the class CartesianProduct method createDAG.

private DAG createDAG(TezConfiguration tezConf) throws IOException {
    InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
    InputInitializerDescriptor inputInitializerDescriptor = InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
    DataSourceDescriptor dataSourceDescriptor = DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);
    Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
    v1.addDataSource(INPUT, dataSourceDescriptor);
    Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
    v2.addDataSource(INPUT, dataSourceDescriptor);
    OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
    OutputCommitterDescriptor outputCommitterDescriptor = OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
    DataSinkDescriptor dataSinkDescriptor = DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);
    CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
    UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
    Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
    v3.addDataSink(OUTPUT, dataSinkDescriptor);
    v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
    EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
    edgeManagerDescriptor.setUserPayload(userPayload);
    UnorderedPartitionedKVEdgeConfig edgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), RoundRobinPartitioner.class.getName()).build();
    EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
    return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3).addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Vertex(org.apache.tez.dag.api.Vertex) UserPayload(org.apache.tez.dag.api.UserPayload) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) CartesianProductVertexManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) CartesianProductEdgeManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) UnorderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) CartesianProductConfig(org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor)

Example 15 with OutputDescriptor

use of org.apache.tez.dag.api.OutputDescriptor in project hive by apache.

the class Converters method convertOutputDescriptorFromProto.

private static OutputDescriptor convertOutputDescriptorFromProto(EntityDescriptorProto proto) {
    String className = proto.getClassName();
    UserPayload payload = convertPayloadFromProto(proto);
    OutputDescriptor od = OutputDescriptor.create(className);
    setUserPayload(od, payload);
    return od;
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) ByteString(com.google.protobuf.ByteString)

Aggregations

OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)32 Test (org.junit.Test)14 OutputContext (org.apache.tez.runtime.api.OutputContext)13 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)10 UserPayload (org.apache.tez.dag.api.UserPayload)10 Configuration (org.apache.hadoop.conf.Configuration)9 OutputCommitterDescriptor (org.apache.tez.dag.api.OutputCommitterDescriptor)9 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)8 InputContext (org.apache.tez.runtime.api.InputContext)8 DAG (org.apache.tez.dag.api.DAG)7 Resource (org.apache.hadoop.yarn.api.records.Resource)6 Path (org.apache.hadoop.fs.Path)5 DataSinkDescriptor (org.apache.tez.dag.api.DataSinkDescriptor)5 WeightedScalingMemoryDistributor (org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor)5 GroupInputEdge (org.apache.tez.dag.api.GroupInputEdge)4 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)4 Vertex (org.apache.tez.dag.api.Vertex)4 OutputSpec (org.apache.tez.runtime.api.impl.OutputSpec)4 ByteString (com.google.protobuf.ByteString)3 JobConf (org.apache.hadoop.mapred.JobConf)3