use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.
the class DagUtils method createEdge.
/**
* Given a Vertex group and a vertex createEdge will create an
* Edge between them.
*
* @param group The parent VertexGroup
* @param vConf The job conf of one of the parrent (grouped) vertices
* @param w The child vertex
* @param edgeProp the edge property of connection between the two
* endpoints.
*/
@SuppressWarnings("rawtypes")
public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w, TezEdgeProperty edgeProp, BaseWork work, TezWork tezWork) throws IOException {
Class mergeInputClass;
LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getName());
EdgeType edgeType = edgeProp.getEdgeType();
switch(edgeType) {
case BROADCAST_EDGE:
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
break;
case CUSTOM_EDGE:
{
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
int numBuckets = edgeProp.getNumBuckets();
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, tezWork.getVertexType(work));
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
byte[] userPayloadBytes = dob.getData();
ByteBuffer userPayload = ByteBuffer.wrap(userPayloadBytes);
desc.setUserPayload(UserPayload.create(userPayload));
w.setVertexManagerPlugin(desc);
break;
}
case CUSTOM_SIMPLE_EDGE:
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
break;
case ONE_TO_ONE_EDGE:
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
break;
case XPROD_EDGE:
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
break;
case SIMPLE_EDGE:
setupAutoReducerParallelism(edgeProp, w);
default:
mergeInputClass = TezMergedLogicalInput.class;
break;
}
return GroupInputEdge.create(group, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork), InputDescriptor.create(mergeInputClass.getName()));
}
use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.
the class DagUtils method createVertexFromMergeWork.
private Vertex createVertexFromMergeWork(JobConf conf, MergeJoinWork mergeJoinWork, Path mrScratchDir, VertexType vertexType) throws Exception {
Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
if (mergeJoinWork.getMainWork() instanceof MapWork) {
List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
Vertex mergeVx = createVertexFromMapWork(conf, mapWork, mrScratchDir, vertexType);
Class<?> inputFormatClass = conf.getClass("mapred.input.format.class", HiveInputFormat.class);
if (inputFormatClass != BucketizedHiveInputFormat.class && inputFormatClass != HiveInputFormat.class) {
// As of now only these two formats are supported.
inputFormatClass = HiveInputFormat.class;
}
conf.setClass("mapred.input.format.class", inputFormatClass, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set
// to false when using this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
for (int i = 0; i < mapWorkList.size(); i++) {
mapWork = (MapWork) (mapWorkList.get(i));
conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
conf.set(Utilities.INPUT_NAME, mapWork.getName());
LOG.info("Going through each work and adding MultiMRInput");
mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, inputFormatClass).build());
}
// To be populated for SMB joins only for all the small tables
Map<String, Integer> inputToBucketMap = new HashMap<>();
if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1 && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) {
// This is an SMB join.
for (BaseWork work : mapWorkList) {
MapWork mw = (MapWork) work;
Map<String, Operator<?>> aliasToWork = mw.getAliasToWork();
Preconditions.checkState(aliasToWork.size() == 1, "More than 1 alias in SMB mapwork");
inputToBucketMap.put(mw.getName(), mw.getWorks().get(0).getOpTraits().getNumBuckets());
}
}
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
// the +1 to the size is because of the main work.
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1, inputToBucketMap);
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
byte[] userPayload = dob.getData();
desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
mergeVx.setVertexManagerPlugin(desc);
return mergeVx;
} else {
return createVertexFromReduceWork(conf, (ReduceWork) mergeJoinWork.getMainWork(), mrScratchDir);
}
}
use of org.apache.tez.dag.api.VertexManagerPluginDescriptor in project hive by apache.
the class DagUtils method setupAutoReducerParallelism.
private void setupAutoReducerParallelism(TezEdgeProperty edgeProp, Vertex v) throws IOException {
if (edgeProp.isAutoReduce()) {
Configuration pluginConf = new Configuration(false);
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());
pluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true);
pluginConf.setInt(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM, edgeProp.getMinReducer());
pluginConf.setLong(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, edgeProp.getInputSizePerReducer());
UserPayload payload = TezUtils.createUserPayloadFromConf(pluginConf);
desc.setUserPayload(payload);
v.setVertexManagerPlugin(desc);
}
}
Aggregations