use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class TestDAGUtils method createDAG.
@SuppressWarnings("deprecation")
private DAGPlan createDAG() {
// Create a plan with 3 vertices: A, B, C. Group(A,B)->C
Configuration conf = new Configuration(false);
int dummyTaskCount = 1;
Resource dummyTaskResource = Resource.newInstance(1, 1);
org.apache.tez.dag.api.Vertex v1 = Vertex.create("vertex1", ProcessorDescriptor.create("Processor").setHistoryText("vertex1 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
v1.addDataSource("input1", DataSourceDescriptor.create(InputDescriptor.create("input.class").setHistoryText("input HistoryText"), null, null));
org.apache.tez.dag.api.Vertex v2 = Vertex.create("vertex2", ProcessorDescriptor.create("Processor").setHistoryText("vertex2 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
org.apache.tez.dag.api.Vertex v3 = Vertex.create("vertex3", ProcessorDescriptor.create("Processor").setHistoryText("vertex3 Processor HistoryText"), dummyTaskCount, dummyTaskResource);
DAG dag = DAG.create("testDag");
dag.setCallerContext(CallerContext.create("context1", "callerId1", "callerType1", "desc1"));
dag.setDAGInfo("dagInfo");
String groupName1 = "uv12";
org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
OutputDescriptor outDesc = OutputDescriptor.create("output.class").setHistoryText("uvOut HistoryText");
OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(OutputCommitter.class.getName());
uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
v3.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class").setHistoryText("Dummy History Text"), InputDescriptor.create("dummy input class").setHistoryText("Dummy History Text")), InputDescriptor.create("merge.class").setHistoryText("Merge HistoryText"));
dag.addVertex(v1);
dag.addVertex(v2);
dag.addVertex(v3);
dag.addEdge(e1);
return dag.createDag(conf, null, null, null, true);
}
use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class YARNRunner method createVertexForStage.
private Vertex createVertexForStage(Configuration stageConf, Map<String, LocalResource> jobLocalResources, List<TaskLocationHint> locations, int stageNum, int totalStages) throws IOException {
// stageNum starts from 0, goes till numStages - 1
boolean isMap = false;
if (stageNum == 0) {
isMap = true;
}
int numTasks = isMap ? stageConf.getInt(MRJobConfig.NUM_MAPS, 0) : stageConf.getInt(MRJobConfig.NUM_REDUCES, 0);
String processorName = isMap ? MapProcessor.class.getName() : ReduceProcessor.class.getName();
String vertexName = null;
if (isMap) {
vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
} else {
if (stageNum == totalStages - 1) {
vertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
} else {
vertexName = MultiStageMRConfigUtil.getIntermediateStageVertexName(stageNum);
}
}
Resource taskResource = isMap ? MRHelpers.getResourceForMRMapper(stageConf) : MRHelpers.getResourceForMRReducer(stageConf);
stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part");
UserPayload vertexUserPayload = TezUtils.createUserPayloadFromConf(stageConf);
Vertex vertex = Vertex.create(vertexName, ProcessorDescriptor.create(processorName).setUserPayload(vertexUserPayload), numTasks, taskResource);
if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
vertex.getProcessorDescriptor().setHistoryText(TezUtils.convertToHistoryText(stageConf));
}
if (isMap) {
vertex.addDataSource("MRInput", configureMRInputWithLegacySplitsGenerated(stageConf, true));
}
// Map only jobs.
if (stageNum == totalStages - 1) {
OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(vertexUserPayload);
if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
od.setHistoryText(TezUtils.convertToHistoryText(stageConf));
}
vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
}
Map<String, String> taskEnv = new HashMap<String, String>();
setupMapReduceEnv(stageConf, taskEnv, isMap);
Map<String, LocalResource> taskLocalResources = new TreeMap<String, LocalResource>();
// PRECOMMIT Remove split localization for reduce tasks if it's being set
// here
taskLocalResources.putAll(jobLocalResources);
String taskJavaOpts = isMap ? MRHelpers.getJavaOptsForMRMapper(stageConf) : MRHelpers.getJavaOptsForMRReducer(stageConf);
vertex.setTaskEnvironment(taskEnv).addTaskLocalFiles(taskLocalResources).setLocationHint(VertexLocationHint.create(locations)).setTaskLaunchCmdOpts(taskJavaOpts);
if (!isMap) {
vertex.setVertexManagerPlugin((ShuffleVertexManager.createConfigBuilder(stageConf).build()));
}
if (LOG.isDebugEnabled()) {
LOG.debug("Adding vertex to DAG" + ", vertexName=" + vertex.getName() + ", processor=" + vertex.getProcessorDescriptor().getClassName() + ", parallelism=" + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts() + ", resources=" + vertex.getTaskResource());
}
return vertex;
}
use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class ProtoConverters method getOutputSpecFromProto.
public static OutputSpec getOutputSpecFromProto(IOSpecProto outputSpecProto) {
OutputDescriptor outputDescriptor = null;
if (outputSpecProto.hasIoDescriptor()) {
outputDescriptor = DagTypeConverters.convertOutputDescriptorFromDAGPlan(outputSpecProto.getIoDescriptor());
}
OutputSpec outputSpec = new OutputSpec(outputSpecProto.getConnectedVertexName(), outputDescriptor, outputSpecProto.getPhysicalEdgeCount());
return outputSpec;
}
use of org.apache.tez.dag.api.OutputDescriptor in project tez by apache.
the class CartesianProduct method createDAG.
private DAG createDAG(TezConfiguration tezConf) throws IOException {
InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
InputInitializerDescriptor inputInitializerDescriptor = InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
DataSourceDescriptor dataSourceDescriptor = DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);
Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v1.addDataSource(INPUT, dataSourceDescriptor);
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT, dataSourceDescriptor);
OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
OutputCommitterDescriptor outputCommitterDescriptor = OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
DataSinkDescriptor dataSinkDescriptor = DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);
CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
v3.addDataSink(OUTPUT, dataSinkDescriptor);
v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
edgeManagerDescriptor.setUserPayload(userPayload);
UnorderedPartitionedKVEdgeConfig edgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), RoundRobinPartitioner.class.getName()).build();
EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3).addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
use of org.apache.tez.dag.api.OutputDescriptor in project hive by apache.
the class Converters method convertOutputDescriptorFromProto.
private static OutputDescriptor convertOutputDescriptorFromProto(EntityDescriptorProto proto) {
String className = proto.getClassName();
UserPayload payload = convertPayloadFromProto(proto);
OutputDescriptor od = OutputDescriptor.create(className);
setUserPayload(od, payload);
return od;
}
Aggregations