use of org.apache.tez.dag.api.Vertex.VertexExecutionContext in project tez by apache.
the class DAG method createDag.
// create protobuf message describing DAG
@Private
public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCredentials, Map<String, LocalResource> tezJarResources, LocalResource binaryConfig, boolean tezLrsAsArchive, ServicePluginsDescriptor servicePluginsDescriptor, JavaOptsChecker javaOptsChecker) {
Deque<String> topologicalVertexStack = verify(true);
verifyLocalResources(tezConf);
DAGPlan.Builder dagBuilder = DAGPlan.newBuilder();
dagBuilder.setName(this.name);
if (this.callerContext != null) {
dagBuilder.setCallerContext(DagTypeConverters.convertCallerContextToProto(callerContext));
}
if (this.dagInfo != null && !this.dagInfo.isEmpty()) {
dagBuilder.setDagInfo(this.dagInfo);
}
// Setup default execution context.
VertexExecutionContext defaultContext = getDefaultExecutionContext();
verifyExecutionContext(defaultContext, servicePluginsDescriptor, "DAGDefault");
if (defaultContext != null) {
DAGProtos.VertexExecutionContextProto contextProto = DagTypeConverters.convertToProto(defaultContext);
dagBuilder.setDefaultExecutionContext(contextProto);
}
if (!vertexGroups.isEmpty()) {
for (VertexGroup av : vertexGroups) {
GroupInfo groupInfo = av.getGroupInfo();
PlanVertexGroupInfo.Builder groupBuilder = PlanVertexGroupInfo.newBuilder();
groupBuilder.setGroupName(groupInfo.getGroupName());
for (Vertex v : groupInfo.getMembers()) {
groupBuilder.addGroupMembers(v.getName());
}
groupBuilder.addAllOutputs(groupInfo.outputs);
for (Map.Entry<String, InputDescriptor> entry : groupInfo.edgeMergedInputs.entrySet()) {
groupBuilder.addEdgeMergedInputs(PlanGroupInputEdgeInfo.newBuilder().setDestVertexName(entry.getKey()).setMergedInput(DagTypeConverters.convertToDAGPlan(entry.getValue())));
}
dagBuilder.addVertexGroups(groupBuilder);
}
}
Credentials dagCredentials = new Credentials();
if (extraCredentials != null) {
dagCredentials.mergeAll(extraCredentials);
}
dagCredentials.mergeAll(credentials);
if (!commonTaskLocalFiles.isEmpty()) {
dagBuilder.addAllLocalResource(DagTypeConverters.convertToDAGPlan(commonTaskLocalFiles));
}
Preconditions.checkArgument(topologicalVertexStack.size() == vertices.size(), "size of topologicalVertexStack is:" + topologicalVertexStack.size() + " while size of vertices is:" + vertices.size() + ", make sure they are the same in order to sort the vertices");
while (!topologicalVertexStack.isEmpty()) {
Vertex vertex = vertices.get(topologicalVertexStack.pop());
// infer credentials, resources and parallelism from data source
Resource vertexTaskResource = vertex.getTaskResource();
if (vertexTaskResource == null) {
vertexTaskResource = Resource.newInstance(tezConf.getInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB_DEFAULT), tezConf.getInt(TezConfiguration.TEZ_TASK_RESOURCE_CPU_VCORES, TezConfiguration.TEZ_TASK_RESOURCE_CPU_VCORES_DEFAULT));
}
Map<String, LocalResource> vertexLRs = Maps.newHashMap();
vertexLRs.putAll(vertex.getTaskLocalFiles());
List<DataSourceDescriptor> dataSources = vertex.getDataSources();
for (DataSourceDescriptor dataSource : dataSources) {
if (dataSource.getCredentials() != null) {
dagCredentials.addAll(dataSource.getCredentials());
}
if (dataSource.getAdditionalLocalFiles() != null) {
TezCommonUtils.addAdditionalLocalResources(dataSource.getAdditionalLocalFiles(), vertexLRs, "Vertex " + vertex.getName());
}
}
if (tezJarResources != null) {
TezCommonUtils.addAdditionalLocalResources(tezJarResources, vertexLRs, "Vertex " + vertex.getName());
}
if (binaryConfig != null) {
vertexLRs.put(TezConstants.TEZ_PB_BINARY_CONF_NAME, binaryConfig);
}
int vertexParallelism = vertex.getParallelism();
VertexLocationHint vertexLocationHint = vertex.getLocationHint();
if (dataSources.size() == 1) {
DataSourceDescriptor dataSource = dataSources.get(0);
if (vertexParallelism == -1 && dataSource.getNumberOfShards() > -1) {
vertexParallelism = dataSource.getNumberOfShards();
}
if (vertexLocationHint == null && dataSource.getLocationHint() != null) {
vertexLocationHint = dataSource.getLocationHint();
}
}
if (vertexParallelism == -1) {
Preconditions.checkState(vertexLocationHint == null, "Cannot specify vertex location hint without specifying vertex parallelism. Vertex: " + vertex.getName());
} else if (vertexLocationHint != null) {
Preconditions.checkState(vertexParallelism == vertexLocationHint.getTaskLocationHints().size(), "vertex task location hint must equal vertex parallelism. Vertex: " + vertex.getName());
}
for (DataSinkDescriptor dataSink : vertex.getDataSinks()) {
if (dataSink.getCredentials() != null) {
dagCredentials.addAll(dataSink.getCredentials());
}
}
VertexPlan.Builder vertexBuilder = VertexPlan.newBuilder();
vertexBuilder.setName(vertex.getName());
// vertex type is implicitly NORMAL until TEZ-46.
vertexBuilder.setType(PlanVertexType.NORMAL);
vertexBuilder.setProcessorDescriptor(DagTypeConverters.convertToDAGPlan(vertex.getProcessorDescriptor()));
// Vertex ExecutionContext setup
VertexExecutionContext execContext = vertex.getVertexExecutionContext();
verifyExecutionContext(execContext, servicePluginsDescriptor, vertex.getName());
if (execContext != null) {
DAGProtos.VertexExecutionContextProto contextProto = DagTypeConverters.convertToProto(execContext);
vertexBuilder.setExecutionContext(contextProto);
}
if (vertex.getInputs().size() > 0) {
for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : vertex.getInputs()) {
vertexBuilder.addInputs(DagTypeConverters.convertToDAGPlan(input));
}
}
if (vertex.getOutputs().size() > 0) {
for (RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor> output : vertex.getOutputs()) {
vertexBuilder.addOutputs(DagTypeConverters.convertToDAGPlan(output));
}
}
if (vertex.getConf() != null && vertex.getConf().size() > 0) {
ConfigurationProto.Builder confBuilder = ConfigurationProto.newBuilder();
TezUtils.populateConfProtoFromEntries(vertex.getConf().entrySet(), confBuilder);
vertexBuilder.setVertexConf(confBuilder);
}
// task config
PlanTaskConfiguration.Builder taskConfigBuilder = PlanTaskConfiguration.newBuilder();
taskConfigBuilder.setNumTasks(vertexParallelism);
taskConfigBuilder.setMemoryMb(vertexTaskResource.getMemory());
taskConfigBuilder.setVirtualCores(vertexTaskResource.getVirtualCores());
try {
taskConfigBuilder.setJavaOpts(TezClientUtils.addDefaultsToTaskLaunchCmdOpts(vertex.getTaskLaunchCmdOpts(), tezConf, javaOptsChecker));
} catch (TezException e) {
throw new TezUncheckedException("Invalid TaskLaunchCmdOpts defined for Vertex " + vertex.getName() + " : " + e.getMessage(), e);
}
taskConfigBuilder.setTaskModule(vertex.getName());
if (!vertexLRs.isEmpty()) {
taskConfigBuilder.addAllLocalResource(DagTypeConverters.convertToDAGPlan(vertexLRs));
}
Map<String, String> taskEnv = Maps.newHashMap(vertex.getTaskEnvironment());
TezYARNUtils.setupDefaultEnv(taskEnv, tezConf, TezConfiguration.TEZ_TASK_LAUNCH_ENV, TezConfiguration.TEZ_TASK_LAUNCH_ENV_DEFAULT, TezConfiguration.TEZ_TASK_LAUNCH_CLUSTER_DEFAULT_ENV, TezConfiguration.TEZ_TASK_LAUNCH_CLUSTER_DEFAULT_ENV_DEFAULT, tezLrsAsArchive);
for (Map.Entry<String, String> entry : taskEnv.entrySet()) {
PlanKeyValuePair.Builder envSettingBuilder = PlanKeyValuePair.newBuilder();
envSettingBuilder.setKey(entry.getKey());
envSettingBuilder.setValue(entry.getValue());
taskConfigBuilder.addEnvironmentSetting(envSettingBuilder);
}
if (vertexLocationHint != null) {
if (vertexLocationHint.getTaskLocationHints() != null) {
for (TaskLocationHint hint : vertexLocationHint.getTaskLocationHints()) {
PlanTaskLocationHint.Builder taskLocationHintBuilder = PlanTaskLocationHint.newBuilder();
// we can allow this later on if needed
if (hint.getAffinitizedTask() != null) {
throw new TezUncheckedException("Task based affinity may not be specified via the DAG API");
}
if (hint.getHosts() != null) {
taskLocationHintBuilder.addAllHost(hint.getHosts());
}
if (hint.getRacks() != null) {
taskLocationHintBuilder.addAllRack(hint.getRacks());
}
vertexBuilder.addTaskLocationHint(taskLocationHintBuilder);
}
}
}
if (vertex.getVertexManagerPlugin() != null) {
vertexBuilder.setVertexManagerPlugin(DagTypeConverters.convertToDAGPlan(vertex.getVertexManagerPlugin()));
}
for (Edge inEdge : vertex.getInputEdges()) {
vertexBuilder.addInEdgeId(inEdge.getId());
}
for (Edge outEdge : vertex.getOutputEdges()) {
vertexBuilder.addOutEdgeId(outEdge.getId());
}
vertexBuilder.setTaskConfig(taskConfigBuilder);
dagBuilder.addVertex(vertexBuilder);
}
for (Edge edge : edges) {
EdgePlan.Builder edgeBuilder = EdgePlan.newBuilder();
edgeBuilder.setId(edge.getId());
edgeBuilder.setInputVertexName(edge.getInputVertex().getName());
edgeBuilder.setOutputVertexName(edge.getOutputVertex().getName());
edgeBuilder.setDataMovementType(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getDataMovementType()));
edgeBuilder.setDataSourceType(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getDataSourceType()));
edgeBuilder.setSchedulingType(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getSchedulingType()));
edgeBuilder.setEdgeSource(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getEdgeSource()));
edgeBuilder.setEdgeDestination(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getEdgeDestination()));
if (edge.getEdgeProperty().getDataMovementType() == DataMovementType.CUSTOM) {
if (edge.getEdgeProperty().getEdgeManagerDescriptor() != null) {
edgeBuilder.setEdgeManager(DagTypeConverters.convertToDAGPlan(edge.getEdgeProperty().getEdgeManagerDescriptor()));
}
// else the AM will deal with this.
}
dagBuilder.addEdge(edgeBuilder);
}
if (dagAccessControls != null) {
dagBuilder.setAclInfo(DagTypeConverters.convertDAGAccessControlsToProto(dagAccessControls));
}
ConfigurationProto.Builder confProtoBuilder = ConfigurationProto.newBuilder();
if (!this.dagConf.isEmpty()) {
TezUtils.populateConfProtoFromEntries(this.dagConf.entrySet(), confProtoBuilder);
}
// Copy historyLogLevel from tezConf into dagConf if its not overridden in dagConf.
String logLevel = this.dagConf.get(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL);
if (logLevel != null) {
// the value is valid.
if (!HistoryLogLevel.validateLogLevel(logLevel)) {
throw new IllegalArgumentException("Config: " + TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL + " is set to invalid value: " + logLevel);
}
} else {
// Validate and set value from tezConf.
logLevel = tezConf.get(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL);
if (logLevel != null) {
if (!HistoryLogLevel.validateLogLevel(logLevel)) {
throw new IllegalArgumentException("Config: " + TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL + " is set to invalid value: " + logLevel);
}
PlanKeyValuePair.Builder kvp = PlanKeyValuePair.newBuilder();
kvp.setKey(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL);
kvp.setValue(logLevel);
confProtoBuilder.addConfKeyValues(kvp);
}
}
dagBuilder.setDagConf(confProtoBuilder);
if (dagCredentials != null) {
dagBuilder.setCredentialsBinary(DagTypeConverters.convertCredentialsToProto(dagCredentials));
TezCommonUtils.logCredentials(LOG, dagCredentials, "dag");
}
return dagBuilder.build();
}
use of org.apache.tez.dag.api.Vertex.VertexExecutionContext in project hive by apache.
the class DagUtils method createVertex.
/**
* Create a vertex from a given work object.
*
* @param conf JobConf to be used to this execution unit
* @param workUnit The instance of BaseWork representing the actual work to be performed
* by this vertex.
* @param scratchDir HDFS scratch dir for this execution unit.
* @return Vertex
*/
@SuppressWarnings("deprecation")
public Vertex createVertex(JobConf conf, BaseWork workUnit, Path scratchDir, TezWork tezWork, Map<String, LocalResource> localResources) throws Exception {
Vertex vertex;
// simply dispatch the call to the right method for the actual (sub-) type of
// BaseWork.
VertexType vertexType = tezWork.getVertexType(workUnit);
if (workUnit instanceof MapWork) {
vertex = createVertexFromMapWork(conf, (MapWork) workUnit, scratchDir, vertexType);
} else if (workUnit instanceof ReduceWork) {
vertex = createVertexFromReduceWork(conf, (ReduceWork) workUnit, scratchDir);
} else if (workUnit instanceof MergeJoinWork) {
vertex = createVertexFromMergeWork(conf, (MergeJoinWork) workUnit, scratchDir, vertexType);
// set VertexManagerPlugin if whether it's a cross product destination vertex
List<String> crossProductSources = new ArrayList<>();
for (BaseWork parentWork : tezWork.getParents(workUnit)) {
if (tezWork.getEdgeType(parentWork, workUnit) == EdgeType.XPROD_EDGE) {
crossProductSources.add(parentWork.getName());
}
}
if (!crossProductSources.isEmpty()) {
CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources);
vertex.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf))));
// parallelism shouldn't be set for cartesian product vertex
}
} else {
// something is seriously wrong if this is happening
throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
}
VertexExecutionContext vertexExecutionContext = createVertexExecutionContext(workUnit);
vertex.addTaskLocalFiles(localResources);
vertex.setTaskLaunchCmdOpts(getContainerJavaOpts(conf));
vertex.setExecutionContext(vertexExecutionContext);
// initialize stats publisher if necessary
if (workUnit.isGatheringStats()) {
StatsPublisher statsPublisher;
StatsFactory factory = StatsFactory.newFactory(conf);
if (factory != null) {
StatsCollectionContext sCntxt = new StatsCollectionContext(conf);
sCntxt.setStatsTmpDirs(Utilities.getStatsTmpDirs(workUnit, conf));
statsPublisher = factory.getStatsPublisher();
if (!statsPublisher.init(sCntxt)) {
// creating stats table if not exists
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
}
}
}
}
final Class outputKlass;
if (HiveOutputFormatImpl.class.getName().equals(conf.get("mapred.output.format.class"))) {
// Hive uses this output format, when it is going to write all its data through FS operator
outputKlass = NullMROutput.class;
} else {
outputKlass = MROutput.class;
}
// If there is a fileSink add a DataSink to the vertex
boolean hasFileSink = workUnit.getAllOperators().stream().anyMatch(o -> o instanceof FileSinkOperator);
// final vertices need to have at least one output
boolean endVertex = tezWork.getLeaves().contains(workUnit);
if (endVertex || hasFileSink) {
OutputCommitterDescriptor ocd = null;
String committer = HiveConf.getVar(conf, ConfVars.TEZ_MAPREDUCE_OUTPUT_COMMITTER);
if (committer != null && !committer.isEmpty()) {
ocd = OutputCommitterDescriptor.create(committer);
}
vertex.addDataSink("out_" + workUnit.getName(), new DataSinkDescriptor(OutputDescriptor.create(outputKlass.getName()).setUserPayload(vertex.getProcessorDescriptor().getUserPayload()), ocd, null));
}
return vertex;
}
use of org.apache.tez.dag.api.Vertex.VertexExecutionContext in project hive by apache.
the class DagUtils method createVertex.
/*
* Helper function to create Vertex from MapWork.
*/
private Vertex createVertex(JobConf conf, MapWork mapWork, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType) throws Exception {
Path tezDir = getTezDir(mrScratchDir);
// set up the operator plan
Utilities.cacheMapWork(conf, mapWork, mrScratchDir);
// create the directories FileSinkOperators need
Utilities.createTmpDirs(conf, mapWork);
// finally create the vertex
Vertex map = null;
// use tez to combine splits
boolean groupSplitsInInputInitializer;
DataSourceDescriptor dataSource;
int numTasks = -1;
@SuppressWarnings("rawtypes") Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class);
boolean vertexHasCustomInput = VertexType.isCustomInputType(vertexType);
LOG.info("Vertex has custom input? " + vertexHasCustomInput);
if (vertexHasCustomInput) {
groupSplitsInInputInitializer = false;
// grouping happens in execution phase. The input payload should not enable grouping here,
// it will be enabled in the CustomVertex.
inputFormatClass = HiveInputFormat.class;
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using
// this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
} else {
// is HiveInputFormat
if (inputFormatClass == HiveInputFormat.class) {
groupSplitsInInputInitializer = true;
} else {
groupSplitsInInputInitializer = false;
}
}
if (mapWork instanceof MergeFileWork) {
Path outputPath = ((MergeFileWork) mapWork).getOutputDir();
// prepare the tmp output directory. The output tmp directory should
// exist before jobClose (before renaming after job completion)
Path tempOutPath = Utilities.toTempPath(outputPath);
try {
FileSystem tmpOutFS = tempOutPath.getFileSystem(conf);
if (!tmpOutFS.exists(tempOutPath)) {
tmpOutFS.mkdirs(tempOutPath);
}
} catch (IOException e) {
throw new RuntimeException("Can't make path " + outputPath + " : " + e.getMessage(), e);
}
}
// remember mapping of plan to input
conf.set(Utilities.INPUT_NAME, mapWork.getName());
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) {
// set up the operator plan. (before setting up splits on the AM)
Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
// the correct plugin.
if (groupSplitsInInputInitializer) {
// Not setting a payload, since the MRInput payload is the same and can be accessed.
InputInitializerDescriptor descriptor = InputInitializerDescriptor.create(HiveSplitGenerator.class.getName());
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(true).setCustomInitializerDescriptor(descriptor).build();
} else {
// Not HiveInputFormat, or a custom VertexManager will take care of grouping splits
if (vertexHasCustomInput) {
dataSource = MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
} else {
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
}
}
} else {
// Setup client side split generation.
// we need to set this, because with HS2 and client side split
// generation we end up not finding the map work. This is
// because of thread local madness (tez split generation is
// multi-threaded - HS2 plan cache uses thread locals). Setting
// VECTOR_MODE/USE_VECTORIZED_INPUT_FILE_FORMAT causes the split gen code to use the conf instead
// of the map work.
conf.setBoolean(Utilities.VECTOR_MODE, mapWork.getVectorMode());
conf.setBoolean(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, mapWork.getUseVectorizedInputFileFormat());
dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_")), true);
numTasks = dataSource.getNumberOfShards();
// set up the operator plan. (after generating splits - that changes configs)
Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
}
UserPayload serializedConf = TezUtils.createUserPayloadFromConf(conf);
String procClassName = MapTezProcessor.class.getName();
if (mapWork instanceof MergeFileWork) {
procClassName = MergeFileTezProcessor.class.getName();
}
VertexExecutionContext executionContext = createVertexExecutionContext(mapWork);
map = Vertex.create(mapWork.getName(), ProcessorDescriptor.create(procClassName).setUserPayload(serializedConf), numTasks, getContainerResource(conf));
map.setTaskEnvironment(getContainerEnvironment(conf, true));
map.setExecutionContext(executionContext);
map.setTaskLaunchCmdOpts(getContainerJavaOpts(conf));
assert mapWork.getAliasToWork().keySet().size() == 1;
// Add the actual source input
String alias = mapWork.getAliasToWork().keySet().iterator().next();
map.addDataSource(alias, dataSource);
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(getBaseName(appJarLr), appJarLr);
for (LocalResource lr : additionalLr) {
localResources.put(getBaseName(lr), lr);
}
map.addTaskLocalFiles(localResources);
return map;
}
use of org.apache.tez.dag.api.Vertex.VertexExecutionContext in project hive by apache.
the class DagUtils method createVertex.
/*
* Helper function to create Vertex from MapWork.
*/
private Vertex createVertex(JobConf conf, MapWork mapWork, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType, Map<String, LocalResource> localResources) throws Exception {
// set up the operator plan
Utilities.cacheMapWork(conf, mapWork, mrScratchDir);
// create the directories FileSinkOperators need
Utilities.createTmpDirs(conf, mapWork);
// finally create the vertex
Vertex map = null;
// use tez to combine splits
boolean groupSplitsInInputInitializer;
DataSourceDescriptor dataSource;
int numTasks = -1;
@SuppressWarnings("rawtypes") Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class);
boolean vertexHasCustomInput = VertexType.isCustomInputType(vertexType);
LOG.info("Vertex has custom input? " + vertexHasCustomInput);
if (vertexHasCustomInput) {
groupSplitsInInputInitializer = false;
// grouping happens in execution phase. The input payload should not enable grouping here,
// it will be enabled in the CustomVertex.
inputFormatClass = HiveInputFormat.class;
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using
// this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
} else {
// is HiveInputFormat
if (inputFormatClass == HiveInputFormat.class) {
groupSplitsInInputInitializer = true;
} else {
groupSplitsInInputInitializer = false;
}
}
if (mapWork instanceof MergeFileWork) {
Path outputPath = ((MergeFileWork) mapWork).getOutputDir();
// prepare the tmp output directory. The output tmp directory should
// exist before jobClose (before renaming after job completion)
Path tempOutPath = Utilities.toTempPath(outputPath);
try {
FileSystem tmpOutFS = tempOutPath.getFileSystem(conf);
if (!tmpOutFS.exists(tempOutPath)) {
tmpOutFS.mkdirs(tempOutPath);
}
} catch (IOException e) {
throw new RuntimeException("Can't make path " + outputPath + " : " + e.getMessage(), e);
}
}
// remember mapping of plan to input
conf.set(Utilities.INPUT_NAME, mapWork.getName());
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) {
// set up the operator plan. (before setting up splits on the AM)
Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
// the correct plugin.
if (groupSplitsInInputInitializer) {
// Not setting a payload, since the MRInput payload is the same and can be accessed.
InputInitializerDescriptor descriptor = InputInitializerDescriptor.create(HiveSplitGenerator.class.getName());
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(true).setCustomInitializerDescriptor(descriptor).build();
} else {
// Not HiveInputFormat, or a custom VertexManager will take care of grouping splits
if (vertexHasCustomInput && vertexType == VertexType.MULTI_INPUT_UNINITIALIZED_EDGES) {
// SMB Join.
dataSource = MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
} else {
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
}
}
} else {
// Setup client side split generation.
// we need to set this, because with HS2 and client side split
// generation we end up not finding the map work. This is
// because of thread local madness (tez split generation is
// multi-threaded - HS2 plan cache uses thread locals). Setting
// VECTOR_MODE/USE_VECTORIZED_INPUT_FILE_FORMAT causes the split gen code to use the conf instead
// of the map work.
conf.setBoolean(Utilities.VECTOR_MODE, mapWork.getVectorMode());
conf.setBoolean(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, mapWork.getUseVectorizedInputFileFormat());
InputSplitInfo inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(conf, false, 0);
InputInitializerDescriptor descriptor = InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName());
InputDescriptor inputDescriptor = InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(conf)).setSplits(inputSplitInfo.getSplitsProto()).build().toByteString().asReadOnlyByteBuffer()));
dataSource = DataSourceDescriptor.create(inputDescriptor, descriptor, null);
numTasks = inputSplitInfo.getNumTasks();
// set up the operator plan. (after generating splits - that changes configs)
Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
}
UserPayload serializedConf = TezUtils.createUserPayloadFromConf(conf);
String procClassName = MapTezProcessor.class.getName();
if (mapWork instanceof MergeFileWork) {
procClassName = MergeFileTezProcessor.class.getName();
}
VertexExecutionContext executionContext = createVertexExecutionContext(mapWork);
map = Vertex.create(mapWork.getName(), ProcessorDescriptor.create(procClassName).setUserPayload(serializedConf), numTasks, getContainerResource(conf));
map.setTaskEnvironment(getContainerEnvironment(conf, true));
map.setExecutionContext(executionContext);
map.setTaskLaunchCmdOpts(getContainerJavaOpts(conf));
assert mapWork.getAliasToWork().keySet().size() == 1;
// Add the actual source input
String alias = mapWork.getAliasToWork().keySet().iterator().next();
map.addDataSource(alias, dataSource);
map.addTaskLocalFiles(localResources);
return map;
}
use of org.apache.tez.dag.api.Vertex.VertexExecutionContext in project tez by apache.
the class TestVertexImpl2 method testDefaultExecContextViaDag.
@Test(timeout = 5000)
public void testDefaultExecContextViaDag() {
VertexExecutionContext defaultExecContext = VertexExecutionContext.create(ExecutionContextTestInfoHolder.append(ExecutionContextTestInfoHolder.TASK_SCHEDULER_NAME_BASE, 0), ExecutionContextTestInfoHolder.append(ExecutionContextTestInfoHolder.CONTAINER_LAUNCHER_NAME_BASE, 2), ExecutionContextTestInfoHolder.append(ExecutionContextTestInfoHolder.TASK_COMM_NAME_BASE, 2));
ExecutionContextTestInfoHolder info = new ExecutionContextTestInfoHolder(null, defaultExecContext, 3);
VertexWrapper vertexWrapper = createVertexWrapperForExecutionContextTest(info);
assertEquals(0, vertexWrapper.vertex.taskSchedulerIdentifier);
assertEquals(2, vertexWrapper.vertex.containerLauncherIdentifier);
assertEquals(2, vertexWrapper.vertex.taskCommunicatorIdentifier);
}
Aggregations