use of org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig in project tez by apache.
the class CartesianProduct method createDAG.
private DAG createDAG(TezConfiguration tezConf, String inputPath1, String inputPath2, String inputPath3, String outputPath, boolean isPartitioned) throws IOException {
Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
// turn off groupSplit so that each input file incurs one task
v1.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1).groupSplits(false).build());
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2).groupSplits(false).build());
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v3.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3).groupSplits(false).build());
CartesianProductConfig cartesianProductConfig;
if (isPartitioned) {
Map<String, Integer> vertexPartitionMap = new HashMap<>();
for (String vertex : cpSources) {
vertexPartitionMap.put(vertex, numPartition);
}
cartesianProductConfig = new CartesianProductConfig(vertexPartitionMap);
} else {
cartesianProductConfig = new CartesianProductConfig(Arrays.asList(cpSources));
}
UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
Vertex v4 = Vertex.create(VERTEX4, ProcessorDescriptor.create(JoinProcessor.class.getName()));
v4.addDataSink(OUTPUT, MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, outputPath).build());
v4.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
EdgeManagerPluginDescriptor cpEdgeManager = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
cpEdgeManager.setUserPayload(userPayload);
EdgeProperty cpEdgeProperty;
if (isPartitioned) {
UnorderedPartitionedKVEdgeConfig cpEdgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), CustomPartitioner.class.getName()).build();
cpEdgeProperty = cpEdgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
} else {
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
cpEdgeProperty = edgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
}
EdgeProperty broadcastEdgeProperty;
UnorderedKVEdgeConfig broadcastEdgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
broadcastEdgeProperty = broadcastEdgeConf.createDefaultBroadcastEdgeProperty();
return DAG.create("CartesianProduct").addVertex(v1).addVertex(v2).addVertex(v3).addVertex(v4).addEdge(Edge.create(v1, v4, cpEdgeProperty)).addEdge(Edge.create(v2, v4, cpEdgeProperty)).addEdge(Edge.create(v3, v4, broadcastEdgeProperty));
}
use of org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig in project tez by apache.
the class FilterLinesByWord method run.
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Credentials credentials = new Credentials();
boolean generateSplitsInClient = false;
SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
try {
generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
otherArgs = splitCmdLineParser.getRemainingArgs();
} catch (ParseException e1) {
System.err.println("Invalid options");
printUsage();
return 2;
}
if (otherArgs.length != 3) {
printUsage();
return 2;
}
String inputPath = otherArgs[0];
String outputPath = otherArgs[1];
String filterWord = otherArgs[2];
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputPath))) {
System.err.println("Output directory : " + outputPath + " already exists");
return 2;
}
TezConfiguration tezConf = new TezConfiguration(conf);
fs.getWorkingDirectory();
Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
if (jarPath == null) {
throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
}
Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);
Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials);
// Why do I need to start the TezSession.
tezSession.start();
Configuration stage1Conf = new JobConf(conf);
stage1Conf.set(FILTER_PARAM_NAME, filterWord);
Configuration stage2Conf = new JobConf(conf);
stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
stage2Conf.setBoolean("mapred.mapper.new-api", false);
UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
// Setup stage1 Vertex
Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
DataSourceDescriptor dsd;
if (generateSplitsInClient) {
// TODO TEZ-1406. Dont' use MRInputLegacy
stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
stage1Conf.setBoolean("mapred.mapper.new-api", false);
dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
} else {
dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
}
stage1Vertex.addDataSource("MRInput", dsd);
// Setup stage2 Vertex
Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1);
stage2Vertex.addTaskLocalFiles(commonLocalResources);
// Configure the Output for stage2
OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
DAG dag = DAG.create("FilterLinesByWord");
Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
LOG.info("Submitting DAG to Tez Session");
DAGClient dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session");
DAGStatus dagStatus = null;
String[] vNames = { "stage1", "stage2" };
try {
while (true) {
dagStatus = dagClient.getDAGStatus(null);
if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() == DAGStatus.State.RUNNING) {
try {
ExampleDriver.printDAGStatus(dagClient, vNames);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(null);
} catch (TezException e) {
LOG.error("Failed to get application progress. Exiting");
return -1;
}
}
dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
use of org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig in project tez by apache.
the class BroadcastAndOneToOneExample method createDAG.
private DAG createDAG(FileSystem fs, TezConfiguration tezConf, Path stagingDir, boolean doLocalityCheck) throws IOException, YarnException {
int numBroadcastTasks = 2;
int numOneToOneTasks = 3;
if (doLocalityCheck) {
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(tezConf);
yarnClient.start();
int numNMs = yarnClient.getNodeReports(NodeState.RUNNING).size();
yarnClient.stop();
// create enough 1-1 tasks to run in parallel
// 1 AM
numOneToOneTasks = numNMs - numBroadcastTasks - 1;
if (numOneToOneTasks < 1) {
numOneToOneTasks = 1;
}
}
byte[] procByte = { (byte) (doLocalityCheck ? 1 : 0), 1 };
UserPayload procPayload = UserPayload.create(ByteBuffer.wrap(procByte));
System.out.println("Using " + numOneToOneTasks + " 1-1 tasks");
Vertex broadcastVertex = Vertex.create("Broadcast", ProcessorDescriptor.create(InputProcessor.class.getName()), numBroadcastTasks);
Vertex inputVertex = Vertex.create("Input", ProcessorDescriptor.create(InputProcessor.class.getName()).setUserPayload(procPayload), numOneToOneTasks);
Vertex oneToOneVertex = Vertex.create("OneToOne", ProcessorDescriptor.create(OneToOneProcessor.class.getName()).setUserPayload(procPayload));
oneToOneVertex.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(InputReadyVertexManager.class.getName()));
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).setFromConfiguration(tezConf).build();
DAG dag = DAG.create("BroadcastAndOneToOneExample");
dag.addVertex(inputVertex).addVertex(broadcastVertex).addVertex(oneToOneVertex).addEdge(Edge.create(inputVertex, oneToOneVertex, edgeConf.createDefaultOneToOneEdgeProperty())).addEdge(Edge.create(broadcastVertex, oneToOneVertex, edgeConf.createDefaultBroadcastEdgeProperty()));
return dag;
}
Aggregations