use of org.apache.tez.dag.api.GroupInputEdge in project hive by apache.
the class TezTask method build.
DAG build(JobConf conf, TezWork work, Path scratchDir, LocalResource appJarLr, List<LocalResource> additionalLr, Context ctx) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
// getAllWork returns a topologically sorted list, which we use to make
// sure that vertices are created before they are used in edges.
List<BaseWork> ws = work.getAllWork();
Collections.reverse(ws);
FileSystem fs = scratchDir.getFileSystem(conf);
// the name of the dag is what is displayed in the AM/Job UI
String dagName = utils.createDagName(conf, queryPlan);
LOG.info("Dag name: " + dagName);
DAG dag = DAG.create(dagName);
// set some info for the query
JSONObject json = new JSONObject(new LinkedHashMap()).put("context", "Hive").put("description", ctx.getCmd());
String dagInfo = json.toString();
if (LOG.isDebugEnabled()) {
LOG.debug("DagInfo: " + dagInfo);
}
dag.setDAGInfo(dagInfo);
dag.setCredentials(conf.getCredentials());
setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf);
for (BaseWork w : ws) {
boolean isFinal = work.getLeaves().contains(w);
// translate work to vertex
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
if (w instanceof UnionWork) {
// Special case for unions. These items translate to VertexGroups
List<BaseWork> unionWorkItems = new LinkedList<BaseWork>();
List<BaseWork> children = new LinkedList<BaseWork>();
// proper children of the union
for (BaseWork v : work.getChildren(w)) {
EdgeType type = work.getEdgeProperty(w, v).getEdgeType();
if (type == EdgeType.CONTAINS) {
unionWorkItems.add(v);
} else {
children.add(v);
}
}
// create VertexGroup
Vertex[] vertexArray = new Vertex[unionWorkItems.size()];
int i = 0;
for (BaseWork v : unionWorkItems) {
vertexArray[i++] = workToVertex.get(v);
}
VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray);
// For a vertex group, all Outputs use the same Key-class, Val-class and partitioner.
// Pick any one source vertex to figure out the Edge configuration.
JobConf parentConf = workToConf.get(unionWorkItems.get(0));
// now hook up the children
for (BaseWork v : children) {
// finally we can create the grouped edge
GroupInputEdge e = utils.createEdge(group, parentConf, workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v));
dag.addEdge(e);
}
} else {
// Regular vertices
JobConf wxConf = utils.initializeVertexConf(conf, ctx, w);
Vertex wx = utils.createVertex(wxConf, w, scratchDir, appJarLr, additionalLr, fs, ctx, !isFinal, work, work.getVertexType(w));
if (w.getReservedMemoryMB() > 0) {
// If reversedMemoryMB is set, make memory allocation fraction adjustment as needed
double frac = DagUtils.adjustMemoryReserveFraction(w.getReservedMemoryMB(), super.conf);
LOG.info("Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac);
wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac));
}
// Otherwise just leave it up to Tez to decide how much memory to allocate
dag.addVertex(wx);
utils.addCredentials(w, dag);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
workToVertex.put(w, wx);
workToConf.put(w, wxConf);
// add all dependencies (i.e.: edges) to the graph
for (BaseWork v : work.getChildren(w)) {
assert workToVertex.containsKey(v);
Edge e = null;
TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);
e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v));
dag.addEdge(e);
}
}
}
// Clear the work map after build. TODO: remove caching instead?
Utilities.clearWorkMap(conf);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
return dag;
}
use of org.apache.tez.dag.api.GroupInputEdge in project tez by apache.
the class TestDAGImpl method createGroupDAGPlan.
// Create a plan with 3 vertices: A, B, C. Group(A,B)->C
static DAGPlan createGroupDAGPlan() {
LOG.info("Setting up group dag plan");
int dummyTaskCount = 1;
Resource dummyTaskResource = Resource.newInstance(1, 1);
org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create("vertex1", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create("vertex2", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
org.apache.tez.dag.api.Vertex v3 = org.apache.tez.dag.api.Vertex.create("vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource);
DAG dag = DAG.create("testDag");
String groupName1 = "uv12";
OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(TotalCountingOutputCommitter.class.getName());
org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
OutputDescriptor outDesc = OutputDescriptor.create("output.class");
uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
v3.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, ocd, null));
GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("merge.class"));
dag.addVertex(v1);
dag.addVertex(v2);
dag.addVertex(v3);
dag.addEdge(e1);
return dag.createDag(conf, null, null, null, true);
}
use of org.apache.tez.dag.api.GroupInputEdge in project hive by apache.
the class TezTask method build.
DAG build(JobConf conf, TezWork tezWork, Path scratchDir, Context ctx, Map<String, LocalResource> vertexResources) throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
// getAllWork returns a topologically sorted list, which we use to make
// sure that vertices are created before they are used in edges.
List<BaseWork> topologicalWorkList = tezWork.getAllWork();
Collections.reverse(topologicalWorkList);
// the name of the dag is what is displayed in the AM/Job UI
String dagName = utils.createDagName(conf, queryPlan);
LOG.info("Dag name: {}", dagName);
DAG dag = DAG.create(dagName);
// set some info for the query
JSONObject json = new JSONObject(new LinkedHashMap<>()).put("context", "Hive").put("description", ctx.getCmd());
String dagInfo = json.toString();
String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
dag.setConf(HiveConf.ConfVars.HIVEQUERYID.varname, queryId);
LOG.debug("DagInfo: {}", dagInfo);
dag.setDAGInfo(dagInfo);
dag.setCredentials(conf.getCredentials());
setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf);
for (BaseWork workUnit : topologicalWorkList) {
// translate work to vertex
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + workUnit.getName());
if (workUnit instanceof UnionWork) {
// Special case for unions. These items translate to VertexGroups
List<BaseWork> unionWorkItems = new LinkedList<BaseWork>();
List<BaseWork> children = new LinkedList<BaseWork>();
// proper children of the union
for (BaseWork v : tezWork.getChildren(workUnit)) {
EdgeType type = tezWork.getEdgeProperty(workUnit, v).getEdgeType();
if (type == EdgeType.CONTAINS) {
unionWorkItems.add(v);
} else {
children.add(v);
}
}
JobConf parentConf = workToConf.get(unionWorkItems.get(0));
checkOutputSpec(workUnit, parentConf);
// create VertexGroup
Vertex[] vertexArray = new Vertex[unionWorkItems.size()];
int i = 0;
for (BaseWork v : unionWorkItems) {
vertexArray[i++] = workToVertex.get(v);
}
VertexGroup group = dag.createVertexGroup(workUnit.getName(), vertexArray);
// now hook up the children
for (BaseWork v : children) {
// finally we can create the grouped edge
GroupInputEdge e = utils.createEdge(group, parentConf, workToVertex.get(v), tezWork.getEdgeProperty(workUnit, v), v, tezWork);
dag.addEdge(e);
}
} else {
// Regular vertices
JobConf wxConf = utils.initializeVertexConf(conf, ctx, workUnit);
checkOutputSpec(workUnit, wxConf);
Vertex wx = utils.createVertex(wxConf, workUnit, scratchDir, tezWork, vertexResources);
if (tezWork.getChildren(workUnit).size() > 1) {
String tezRuntimeSortMb = wxConf.get(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB);
int originalValue = 0;
if (tezRuntimeSortMb == null) {
originalValue = TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB_DEFAULT;
} else {
originalValue = Integer.valueOf(tezRuntimeSortMb);
}
int newValue = originalValue / tezWork.getChildren(workUnit).size();
wxConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, Integer.toString(newValue));
LOG.info("Modified " + TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + " to " + newValue);
}
if (workUnit.getReservedMemoryMB() > 0) {
// If reversedMemoryMB is set, make memory allocation fraction adjustment as needed
double frac = DagUtils.adjustMemoryReserveFraction(workUnit.getReservedMemoryMB(), super.conf);
LOG.info("Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac);
wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac));
}
// Otherwise just leave it up to Tez to decide how much memory to allocate
dag.addVertex(wx);
utils.addCredentials(workUnit, dag, conf);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + workUnit.getName());
workToVertex.put(workUnit, wx);
workToConf.put(workUnit, wxConf);
// add all dependencies (i.e.: edges) to the graph
for (BaseWork v : tezWork.getChildren(workUnit)) {
assert workToVertex.containsKey(v);
Edge e = null;
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(workUnit, v);
e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, tezWork);
dag.addEdge(e);
}
}
}
// Clear the work map after build. TODO: remove caching instead?
Utilities.clearWorkMap(conf);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
return dag;
}
Aggregations