use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.
the class DagUtils method createEdge.
/**
* Given a Vertex group and a vertex createEdge will create an
* Edge between them.
*
* @param group The parent VertexGroup
* @param vConf The job conf of one of the parrent (grouped) vertices
* @param w The child vertex
* @param edgeProp the edge property of connection between the two
* endpoints.
*/
@SuppressWarnings("rawtypes")
public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, TezEdgeProperty edgeProp, VertexType vertexType) throws IOException {
Class mergeInputClass;
LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getName());
EdgeType edgeType = edgeProp.getEdgeType();
switch(edgeType) {
case BROADCAST_EDGE:
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
break;
case CUSTOM_EDGE:
{
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
int numBuckets = edgeProp.getNumBuckets();
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, vertexType);
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
byte[] userPayloadBytes = dob.getData();
ByteBuffer userPayload = ByteBuffer.wrap(userPayloadBytes);
desc.setUserPayload(UserPayload.create(userPayload));
w.setVertexManagerPlugin(desc);
break;
}
case CUSTOM_SIMPLE_EDGE:
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
break;
case SIMPLE_EDGE:
setupAutoReducerParallelism(edgeProp, w);
default:
mergeInputClass = TezMergedLogicalInput.class;
break;
}
return GroupInputEdge.create(group, w, createEdgeProperty(edgeProp, vConf), InputDescriptor.create(mergeInputClass.getName()));
}
use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project tez by apache.
the class VertexImpl method assignVertexManager.
private void assignVertexManager() throws TezException {
// VertexReconfigureDoneEvent will be logged
if (recoveryData != null && recoveryData.shouldSkipInit()) {
// Replace the original VertexManager with NoOpVertexManager if the reconfiguration is done in the last AM attempt
VertexConfigurationDoneEvent reconfigureDoneEvent = recoveryData.getVertexConfigurationDoneEvent();
if (LOG.isInfoEnabled()) {
LOG.info("VertexManager reconfiguration is done in the last AM Attempt" + ", use NoOpVertexManager to replace it, vertexId=" + logIdentifier);
LOG.info("VertexReconfigureDoneEvent=" + reconfigureDoneEvent);
}
NonSyncByteArrayOutputStream out = new NonSyncByteArrayOutputStream();
try {
reconfigureDoneEvent.toProtoStream(out);
} catch (IOException e) {
throw new TezUncheckedException("Unable to deserilize VertexReconfigureDoneEvent");
}
this.vertexManager = new VertexManager(VertexManagerPluginDescriptor.create(NoOpVertexManager.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(out.toByteArray()))), dagUgi, this, appContext, stateChangeNotifier);
return;
}
boolean hasBipartite = false;
boolean hasOneToOne = false;
boolean hasCustom = false;
if (sourceVertices != null) {
for (Edge edge : sourceVertices.values()) {
switch(edge.getEdgeProperty().getDataMovementType()) {
case SCATTER_GATHER:
hasBipartite = true;
break;
case ONE_TO_ONE:
hasOneToOne = true;
break;
case BROADCAST:
break;
case CUSTOM:
hasCustom = true;
break;
default:
throw new TezUncheckedException("Unknown data movement type: " + edge.getEdgeProperty().getDataMovementType());
}
}
}
boolean hasUserVertexManager = vertexPlan.hasVertexManagerPlugin();
if (hasUserVertexManager) {
VertexManagerPluginDescriptor pluginDesc = DagTypeConverters.convertVertexManagerPluginDescriptorFromDAGPlan(vertexPlan.getVertexManagerPlugin());
LOG.info("Setting user vertex manager plugin: " + pluginDesc.getClassName() + " on vertex: " + getLogIdentifier());
vertexManager = new VertexManager(pluginDesc, dagUgi, this, appContext, stateChangeNotifier);
} else {
// Else we use the default ImmediateStartVertexManager
if (inputsWithInitializers != null) {
LOG.info("Setting vertexManager to RootInputVertexManager for " + logIdentifier);
vertexManager = new VertexManager(RootInputVertexManager.createConfigBuilder(vertexConf).build(), dagUgi, this, appContext, stateChangeNotifier);
} else if (hasOneToOne && !hasCustom) {
LOG.info("Setting vertexManager to InputReadyVertexManager for " + logIdentifier);
vertexManager = new VertexManager(VertexManagerPluginDescriptor.create(InputReadyVertexManager.class.getName()), dagUgi, this, appContext, stateChangeNotifier);
} else if (hasBipartite && !hasCustom) {
LOG.info("Setting vertexManager to ShuffleVertexManager for " + logIdentifier);
// shuffle vertex manager needs a conf payload
vertexManager = new VertexManager(ShuffleVertexManager.createConfigBuilder(vertexConf).build(), dagUgi, this, appContext, stateChangeNotifier);
} else {
// schedule all tasks upon vertex start. Default behavior.
LOG.info("Setting vertexManager to ImmediateStartVertexManager for " + logIdentifier);
vertexManager = new VertexManager(VertexManagerPluginDescriptor.create(ImmediateStartVertexManager.class.getName()), dagUgi, this, appContext, stateChangeNotifier);
}
}
}
use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.
the class DagUtils method setupQuickStart.
private void setupQuickStart(TezEdgeProperty edgeProp, Vertex v) throws IOException {
if (!edgeProp.isSlowStart()) {
Configuration pluginConf = new Configuration(false);
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());
pluginConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, 0);
pluginConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, 0);
UserPayload payload = TezUtils.createUserPayloadFromConf(pluginConf);
desc.setUserPayload(payload);
v.setVertexManagerPlugin(desc);
}
}
use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.
the class DagUtils method createVertex.
private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType) throws Exception {
Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
if (mergeJoinWork.getMainWork() instanceof MapWork) {
List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
Vertex mergeVx = createVertex(conf, mapWork, appJarLr, additionalLr, fs, mrScratchDir, ctx, vertexType);
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set
// to false when using this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
for (int i = 0; i < mapWorkList.size(); i++) {
mapWork = (MapWork) (mapWorkList.get(i));
conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
conf.set(Utilities.INPUT_NAME, mapWork.getName());
LOG.info("Going through each work and adding MultiMRInput");
mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
}
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
// the +1 to the size is because of the main work.
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1);
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
byte[] userPayload = dob.getData();
desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
mergeVx.setVertexManagerPlugin(desc);
return mergeVx;
} else {
Vertex mergeVx = createVertex(conf, (ReduceWork) mergeJoinWork.getMainWork(), appJarLr, additionalLr, fs, mrScratchDir, ctx);
return mergeVx;
}
}
use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.
the class DagUtils method createVertex.
private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType, Map<String, LocalResource> localResources) throws Exception {
Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
if (mergeJoinWork.getMainWork() instanceof MapWork) {
List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
Vertex mergeVx = createVertex(conf, mapWork, fs, mrScratchDir, ctx, vertexType, localResources);
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set
// to false when using this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
for (int i = 0; i < mapWorkList.size(); i++) {
mapWork = (MapWork) (mapWorkList.get(i));
conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
conf.set(Utilities.INPUT_NAME, mapWork.getName());
LOG.info("Going through each work and adding MultiMRInput");
mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
}
// To be populated for SMB joins only for all the small tables
Map<String, Integer> inputToBucketMap = new HashMap<>();
if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) {
// This is an SMB join.
for (BaseWork work : mapWorkList) {
MapWork mw = (MapWork) work;
Map<String, Operator<?>> aliasToWork = mw.getAliasToWork();
Preconditions.checkState(aliasToWork.size() == 1, "More than 1 alias in SMB mapwork");
inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets());
}
}
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
// the +1 to the size is because of the main work.
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1, inputToBucketMap);
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
byte[] userPayload = dob.getData();
desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
mergeVx.setVertexManagerPlugin(desc);
return mergeVx;
} else {
return createVertex(conf, (ReduceWork) mergeJoinWork.getMainWork(), fs, mrScratchDir, ctx, localResources);
}
}
Aggregations