use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.
the class DAG method createDag.
// create protobuf message describing DAG
@Private
public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCredentials, Map<String, LocalResource> tezJarResources, LocalResource binaryConfig, boolean tezLrsAsArchive, ServicePluginsDescriptor servicePluginsDescriptor, JavaOptsChecker javaOptsChecker) {
Deque<String> topologicalVertexStack = verify(true);
verifyLocalResources(tezConf);
DAGPlan.Builder dagBuilder = DAGPlan.newBuilder();
dagBuilder.setName(this.name);
if (this.callerContext != null) {
dagBuilder.setCallerContext(DagTypeConverters.convertCallerContextToProto(callerContext));
}
if (this.dagInfo != null && !this.dagInfo.isEmpty()) {
dagBuilder.setDagInfo(this.dagInfo);
}
// Setup default execution context.
VertexExecutionContext defaultContext = getDefaultExecutionContext();
verifyExecutionContext(defaultContext, servicePluginsDescriptor, "DAGDefault");
if (defaultContext != null) {
DAGProtos.VertexExecutionContextProto contextProto = DagTypeConverters.convertToProto(defaultContext);
dagBuilder.setDefaultExecutionContext(contextProto);
}
if (!vertexGroups.isEmpty()) {
for (VertexGroup av : vertexGroups) {
GroupInfo groupInfo = av.getGroupInfo();
PlanVertexGroupInfo.Builder groupBuilder = PlanVertexGroupInfo.newBuilder();
groupBuilder.setGroupName(groupInfo.getGroupName());
for (Vertex v : groupInfo.getMembers()) {
groupBuilder.addGroupMembers(v.getName());
}
groupBuilder.addAllOutputs(groupInfo.outputs);
for (Map.Entry<String, InputDescriptor> entry : groupInfo.edgeMergedInputs.entrySet()) {
groupBuilder.addEdgeMergedInputs(PlanGroupInputEdgeInfo.newBuilder().setDestVertexName(entry.getKey()).setMergedInput(DagTypeConverters.convertToDAGPlan(entry.getValue())));
}
dagBuilder.addVertexGroups(groupBuilder);
}
}
Credentials dagCredentials = new Credentials();
if (extraCredentials != null) {
dagCredentials.mergeAll(extraCredentials);
}
dagCredentials.mergeAll(credentials);
if (!commonTaskLocalFiles.isEmpty()) {
dagBuilder.addAllLocalResource(DagTypeConverters.convertToDAGPlan(commonTaskLocalFiles));
}
Preconditions.checkArgument(topologicalVertexStack.size() == vertices.size(), "size of topologicalVertexStack is:" + topologicalVertexStack.size() + " while size of vertices is:" + vertices.size() + ", make sure they are the same in order to sort the vertices");
while (!topologicalVertexStack.isEmpty()) {
Vertex vertex = vertices.get(topologicalVertexStack.pop());
// infer credentials, resources and parallelism from data source
Resource vertexTaskResource = vertex.getTaskResource();
if (vertexTaskResource == null) {
vertexTaskResource = Resource.newInstance(tezConf.getInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB_DEFAULT), tezConf.getInt(TezConfiguration.TEZ_TASK_RESOURCE_CPU_VCORES, TezConfiguration.TEZ_TASK_RESOURCE_CPU_VCORES_DEFAULT));
}
Map<String, LocalResource> vertexLRs = Maps.newHashMap();
vertexLRs.putAll(vertex.getTaskLocalFiles());
List<DataSourceDescriptor> dataSources = vertex.getDataSources();
for (DataSourceDescriptor dataSource : dataSources) {
if (dataSource.getCredentials() != null) {
dagCredentials.addAll(dataSource.getCredentials());
}
if (dataSource.getAdditionalLocalFiles() != null) {
TezCommonUtils.addAdditionalLocalResources(dataSource.getAdditionalLocalFiles(), vertexLRs, "Vertex " + vertex.getName());
}
}
if (tezJarResources != null) {
TezCommonUtils.addAdditionalLocalResources(tezJarResources, vertexLRs, "Vertex " + vertex.getName());
}
if (binaryConfig != null) {
vertexLRs.put(TezConstants.TEZ_PB_BINARY_CONF_NAME, binaryConfig);
}
int vertexParallelism = vertex.getParallelism();
VertexLocationHint vertexLocationHint = vertex.getLocationHint();
if (dataSources.size() == 1) {
DataSourceDescriptor dataSource = dataSources.get(0);
if (vertexParallelism == -1 && dataSource.getNumberOfShards() > -1) {
vertexParallelism = dataSource.getNumberOfShards();
}
if (vertexLocationHint == null && dataSource.getLocationHint() != null) {
vertexLocationHint = dataSource.getLocationHint();
}
}
if (vertexParallelism == -1) {
Preconditions.checkState(vertexLocationHint == null, "Cannot specify vertex location hint without specifying vertex parallelism. Vertex: " + vertex.getName());
} else if (vertexLocationHint != null) {
Preconditions.checkState(vertexParallelism == vertexLocationHint.getTaskLocationHints().size(), "vertex task location hint must equal vertex parallelism. Vertex: " + vertex.getName());
}
for (DataSinkDescriptor dataSink : vertex.getDataSinks()) {
if (dataSink.getCredentials() != null) {
dagCredentials.addAll(dataSink.getCredentials());
}
}
VertexPlan.Builder vertexBuilder = VertexPlan.newBuilder();
vertexBuilder.setName(vertex.getName());
// vertex type is implicitly NORMAL until TEZ-46.
vertexBuilder.setType(PlanVertexType.NORMAL);
vertexBuilder.setProcessorDescriptor(DagTypeConverters.convertToDAGPlan(vertex.getProcessorDescriptor()));
// Vertex ExecutionContext setup
VertexExecutionContext execContext = vertex.getVertexExecutionContext();
verifyExecutionContext(execContext, servicePluginsDescriptor, vertex.getName());
if (execContext != null) {
DAGProtos.VertexExecutionContextProto contextProto = DagTypeConverters.convertToProto(execContext);
vertexBuilder.setExecutionContext(contextProto);
}
if (vertex.getInputs().size() > 0) {
for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : vertex.getInputs()) {
vertexBuilder.addInputs(DagTypeConverters.convertToDAGPlan(input));
}
}
if (vertex.getOutputs().size() > 0) {
for (RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor> output : vertex.getOutputs()) {
vertexBuilder.addOutputs(DagTypeConverters.convertToDAGPlan(output));
}
}
if (vertex.getConf() != null && vertex.getConf().size() > 0) {
ConfigurationProto.Builder confBuilder = ConfigurationProto.newBuilder();
TezUtils.populateConfProtoFromEntries(vertex.getConf().entrySet(), confBuilder);
vertexBuilder.setVertexConf(confBuilder);
}
// task config
PlanTaskConfiguration.Builder taskConfigBuilder = PlanTaskConfiguration.newBuilder();
taskConfigBuilder.setNumTasks(vertexParallelism);
taskConfigBuilder.setMemoryMb(vertexTaskResource.getMemory());
taskConfigBuilder.setVirtualCores(vertexTaskResource.getVirtualCores());
try {
taskConfigBuilder.setJavaOpts(TezClientUtils.addDefaultsToTaskLaunchCmdOpts(vertex.getTaskLaunchCmdOpts(), tezConf, javaOptsChecker));
} catch (TezException e) {
throw new TezUncheckedException("Invalid TaskLaunchCmdOpts defined for Vertex " + vertex.getName() + " : " + e.getMessage(), e);
}
taskConfigBuilder.setTaskModule(vertex.getName());
if (!vertexLRs.isEmpty()) {
taskConfigBuilder.addAllLocalResource(DagTypeConverters.convertToDAGPlan(vertexLRs));
}
Map<String, String> taskEnv = Maps.newHashMap(vertex.getTaskEnvironment());
TezYARNUtils.setupDefaultEnv(taskEnv, tezConf, TezConfiguration.TEZ_TASK_LAUNCH_ENV, TezConfiguration.TEZ_TASK_LAUNCH_ENV_DEFAULT, TezConfiguration.TEZ_TASK_LAUNCH_CLUSTER_DEFAULT_ENV, TezConfiguration.TEZ_TASK_LAUNCH_CLUSTER_DEFAULT_ENV_DEFAULT, tezLrsAsArchive);
for (Map.Entry<String, String> entry : taskEnv.entrySet()) {
PlanKeyValuePair.Builder envSettingBuilder = PlanKeyValuePair.newBuilder();
envSettingBuilder.setKey(entry.getKey());
envSettingBuilder.setValue(entry.getValue());
taskConfigBuilder.addEnvironmentSetting(envSettingBuilder);
}
if (vertexLocationHint != null) {
if (vertexLocationHint.getTaskLocationHints() != null) {
for (TaskLocationHint hint : vertexLocationHint.getTaskLocationHints()) {
PlanTaskLocationHint.Builder taskLocationHintBuilder = PlanTaskLocationHint.newBuilder();
// we can allow this later on if needed
if (hint.getAffinitizedTask() != null) {
throw new TezUncheckedException("Task based affinity may not be specified via the DAG API");
}
if (hint.getHosts() != null) {
taskLocationHintBuilder.addAllHost(hint.getHosts());
}
if (hint.getRacks() != null) {
taskLocationHintBuilder.addAllRack(hint.getRacks());
}
vertexBuilder.addTaskLocationHint(taskLocationHintBuilder);
}
}
}
if (vertex.getVertexManagerPlugin() != null) {
vertexBuilder.setVertexManagerPlugin(DagTypeConverters.convertToDAGPlan(vertex.getVertexManagerPlugin()));
}
for (Edge inEdge : vertex.getInputEdges()) {
vertexBuilder.addInEdgeId(inEdge.getId());
}
for (Edge outEdge : vertex.getOutputEdges()) {
vertexBuilder.addOutEdgeId(outEdge.getId());
}
vertexBuilder.setTaskConfig(taskConfigBuilder);
dagBuilder.addVertex(vertexBuilder);
}
for (Edge edge : edges) {
EdgePlan.Builder edgeBuilder = EdgePlan.newBuilder();
edgeBuilder.setId(edge.getId());
edgeBuilder.setInputVertexName(edge.getInputVertex().getName());
edgeBuilder.setOutputVertexName(edge.getOutputVertex().getName());
edgeBuilder.setDataMovementType(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getDataMovementType()));
edgeBuilder.setDataSourceType(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getDataSourceType()));
edgeBuilder.setSchedulingType(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getSchedulingType()));
edgeBuilder.setEdgeSource(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getEdgeSource()));
edgeBuilder.setEdgeDestination(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getEdgeDestination()));
if (edge.getEdgeProperty().getDataMovementType() == DataMovementType.CUSTOM) {
if (edge.getEdgeProperty().getEdgeManagerDescriptor() != null) {
edgeBuilder.setEdgeManager(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getEdgeManagerDescriptor()));
}
// else the AM will deal with this.
}
dagBuilder.addEdge(edgeBuilder);
}
if (dagAccessControls != null) {
dagBuilder.setAclInfo(DagTypeConverters.convertDAGAccessControlsToProto(dagAccessControls));
}
ConfigurationProto.Builder confProtoBuilder = ConfigurationProto.newBuilder();
if (!this.dagConf.isEmpty()) {
TezUtils.populateConfProtoFromEntries(this.dagConf.entrySet(), confProtoBuilder);
}
// Copy historyLogLevel from tezConf into dagConf if its not overridden in dagConf.
String logLevel = this.dagConf.get(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL);
if (logLevel != null) {
// the value is valid.
if (!HistoryLogLevel.validateLogLevel(logLevel)) {
throw new IllegalArgumentException("Config: " + TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL + " is set to invalid value: " + logLevel);
}
} else {
// Validate and set value from tezConf.
logLevel = tezConf.get(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL);
if (logLevel != null) {
if (!HistoryLogLevel.validateLogLevel(logLevel)) {
throw new IllegalArgumentException("Config: " + TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL + " is set to invalid value: " + logLevel);
}
PlanKeyValuePair.Builder kvp = PlanKeyValuePair.newBuilder();
kvp.setKey(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL);
kvp.setValue(logLevel);
confProtoBuilder.addConfKeyValues(kvp);
}
}
dagBuilder.setDagConf(confProtoBuilder);
if (dagCredentials != null) {
dagBuilder.setCredentialsBinary(DagTypeConverters.convertCredentialsToProto(dagCredentials));
TezCommonUtils.logCredentials(LOG, dagCredentials, "dag");
}
return dagBuilder.build();
}
use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.
the class TestRecoveryService method testRecoveryFlushOnSummaryEvent.
@Test(timeout = 5000)
public void testRecoveryFlushOnSummaryEvent() throws Exception {
setup(true, new String[][] { { TezConfiguration.DAG_RECOVERY_MAX_UNFLUSHED_EVENTS, "-1" }, { TezConfiguration.DAG_RECOVERY_FLUSH_INTERVAL_SECS, "-1" } });
recoveryService.start();
DAGPlan dagPlan = DAGPlan.newBuilder().setName("test_dag").build();
// This writes to recovery immediately.
recoveryService.handle(new DAGHistoryEvent(dagId, new DAGSubmittedEvent(dagId, startTime, dagPlan, appAttemptId, null, "nobody", conf, null, "default")));
waitForDrain(-1);
verify(summaryFos, times(1)).hflush();
verify(dagFos, times(1)).hflush();
// This does not write to recovery immediately.
recoveryService.handle(new DAGHistoryEvent(dagId, new DAGCommitStartedEvent(dagId, startTime)));
waitForDrain(-1);
verify(summaryFos, times(2)).hflush();
verify(dagFos, times(1)).hflush();
// Does flush on stop.
recoveryService.stop();
verify(dagFos, times(2)).hflush();
}
use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.
the class TestVertexImpl method createDAGPlanForOneToOneSplit.
private DAGPlan createDAGPlanForOneToOneSplit(String initializerClassName, int numTasks, boolean addNullEdge) {
VertexPlan.Builder v1Builder = VertexPlan.newBuilder();
v1Builder.setName("vertex1").setType(PlanVertexType.NORMAL).addOutEdgeId("e1").addOutEdgeId("e2");
if (addNullEdge) {
v1Builder.addOutEdgeId("e5");
}
if (initializerClassName != null) {
numTasks = -1;
v1Builder.addInputs(RootInputLeafOutputProto.newBuilder().setControllerDescriptor(TezEntityDescriptorProto.newBuilder().setClassName(initializerClassName)).setName("input1").setIODescriptor(TezEntityDescriptorProto.newBuilder().setClassName("InputClazz").build()).build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(-1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build());
} else {
v1Builder.setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(numTasks).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build());
}
VertexPlan v1Plan = v1Builder.build();
VertexPlan.Builder v4Builder = VertexPlan.newBuilder();
v4Builder.setName("vertex4").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(numTasks).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x4.y4").build()).addInEdgeId("e3").addInEdgeId("e4");
if (addNullEdge) {
v4Builder.addOutEdgeId("e6");
}
VertexPlan v4Plan = v4Builder.build();
LOG.info("Setting up one to one dag plan");
DAGPlan.Builder dagBuilder = DAGPlan.newBuilder().setName("testVertexOneToOneSplit").addVertex(v1Plan).addVertex(VertexPlan.newBuilder().setName("vertex2").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(numTasks).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x2.y2").build()).addInEdgeId("e1").addOutEdgeId("e3").build()).addVertex(VertexPlan.newBuilder().setName("vertex3").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(numTasks).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x3.y3").build()).addInEdgeId("e2").addOutEdgeId("e4").build()).addVertex(v4Plan).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v1_v2")).setInputVertexName("vertex1").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex2").setDataMovementType(PlanEdgeDataMovementType.ONE_TO_ONE).setId("e1").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v1_v3")).setInputVertexName("vertex1").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex3").setDataMovementType(PlanEdgeDataMovementType.ONE_TO_ONE).setId("e2").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v2_v4")).setInputVertexName("vertex2").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex4").setDataMovementType(PlanEdgeDataMovementType.ONE_TO_ONE).setId("e3").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v3_v4")).setInputVertexName("vertex3").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex4").setDataMovementType(PlanEdgeDataMovementType.ONE_TO_ONE).setId("e4").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL));
if (addNullEdge) {
dagBuilder.addVertex(VertexPlan.newBuilder().setName("vertex5").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x4.y4").build()).addInEdgeId("e5").build()).addVertex(VertexPlan.newBuilder().setName("vertex6").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x4.y4").build()).addInEdgeId("e6").build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v1_v5")).setInputVertexName("vertex1").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex5").setDataMovementType(PlanEdgeDataMovementType.CUSTOM).setId("e5").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v4_v6")).setInputVertexName("vertex4").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex6").setDataMovementType(PlanEdgeDataMovementType.CUSTOM).setId("e6").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build());
}
DAGPlan dag = dagBuilder.build();
return dag;
}
use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.
the class TestVertexImpl method createDAGPlanWithRunningInitializer.
private DAGPlan createDAGPlanWithRunningInitializer() {
LOG.info("Setting up dag plan with running input initializer");
DAGPlan dag = DAGPlan.newBuilder().setName("DagWithInputInitializer2").addVertex(VertexPlan.newBuilder().setName("vertex1").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build()).addOutEdgeId("e1").build()).addVertex(VertexPlan.newBuilder().setName("vertex2").setType(PlanVertexType.NORMAL).addInputs(RootInputLeafOutputProto.newBuilder().setControllerDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("IrrelevantInitializerClassName")).setName("input1").setIODescriptor(TezEntityDescriptorProto.newBuilder().setClassName("InputClazz").build()).build()).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(20).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x2.y2").build()).addInEdgeId("e1").build()).addEdge(EdgePlan.newBuilder().setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("v1_v2")).setInputVertexName("vertex1").setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")).setOutputVertexName("vertex2").setDataMovementType(PlanEdgeDataMovementType.BROADCAST).setId("e1").setDataSourceType(PlanEdgeDataSourceType.PERSISTED).setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL).build()).build();
return dag;
}
use of org.apache.tez.dag.api.records.DAGProtos.DAGPlan in project tez by apache.
the class TestVertexImpl method createDAGPlanWithNonExistVertexManager.
private DAGPlan createDAGPlanWithNonExistVertexManager() {
LOG.info("Setting up dag plan with non-exist VertexManager");
DAGPlan dag = DAGPlan.newBuilder().setName("initializerWith0Tasks").addVertex(VertexPlan.newBuilder().setName("vertex1").setType(PlanVertexType.NORMAL).setTaskConfig(PlanTaskConfiguration.newBuilder().setNumTasks(1).setVirtualCores(4).setMemoryMb(1024).setJavaOpts("").setTaskModule("x1.y1").build()).setVertexManagerPlugin(TezEntityDescriptorProto.newBuilder().setClassName("non-exist-vertexmanager")).build()).build();
return dag;
}
Aggregations