use of org.apache.tez.client.TezClient in project tez by apache.
the class TestTezJobs method testOrderedWordCount.
@Test(timeout = 60000)
public void testOrderedWordCount() throws Exception {
String inputDirStr = "/tmp/owc-input/";
Path inputDir = new Path(inputDirStr);
Path stagingDirPath = new Path("/tmp/owc-staging-dir");
remoteFs.mkdirs(inputDir);
remoteFs.mkdirs(stagingDirPath);
generateOrderedWordCountInput(inputDir, remoteFs);
String outputDirStr = "/tmp/owc-output/";
Path outputDir = new Path(outputDirStr);
TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
TezClient tezSession = null;
try {
OrderedWordCount job = new OrderedWordCount();
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-counter", inputDirStr, outputDirStr, "2" }, null) == 0);
verifyOutput(outputDir, remoteFs);
} finally {
remoteFs.delete(stagingDirPath, true);
if (tezSession != null) {
tezSession.stop();
}
}
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class TestTezJobs method testMultipleCommits_OnDAGSuccess.
@Test(timeout = 60000)
public void testMultipleCommits_OnDAGSuccess() throws Exception {
Path stagingDirPath = new Path("/tmp/commit-staging-dir");
Random rand = new Random();
String v1OutputPathPrefix = "/tmp/commit-output-v1";
int v1OutputNum = rand.nextInt(10) + 1;
String v2OutputPathPrefix = "/tmp/commit-output-v2";
int v2OutputNum = rand.nextInt(10) + 1;
String uv12OutputPathPrefix = "/tmp/commit-output-uv12";
int uv12OutputNum = rand.nextInt(10) + 1;
String v3OutputPathPrefix = "/tmp/commit-output-v3";
int v3OutputNum = rand.nextInt(10) + 1;
TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
TezClient tezSession = null;
try {
MultipleCommitsExample job = new MultipleCommitsExample();
Assert.assertTrue("MultipleCommitsExample failed", job.run(tezConf, new String[] { v1OutputPathPrefix, v1OutputNum + "", v2OutputPathPrefix, v2OutputNum + "", uv12OutputPathPrefix, uv12OutputNum + "", v3OutputPathPrefix, v3OutputNum + "" }, null) == 0);
verifyCommits(v1OutputPathPrefix, v1OutputNum);
verifyCommits(v2OutputPathPrefix, v2OutputNum);
verifyCommits(uv12OutputPathPrefix, uv12OutputNum);
verifyCommits(v3OutputPathPrefix, v3OutputNum);
} finally {
remoteFs.delete(stagingDirPath, true);
if (tezSession != null) {
tezSession.stop();
}
}
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class FilterLinesByWord method run.
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Credentials credentials = new Credentials();
boolean generateSplitsInClient = false;
SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
try {
generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
otherArgs = splitCmdLineParser.getRemainingArgs();
} catch (ParseException e1) {
System.err.println("Invalid options");
printUsage();
return 2;
}
if (otherArgs.length != 3) {
printUsage();
return 2;
}
String inputPath = otherArgs[0];
String outputPath = otherArgs[1];
String filterWord = otherArgs[2];
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputPath))) {
System.err.println("Output directory : " + outputPath + " already exists");
return 2;
}
TezConfiguration tezConf = new TezConfiguration(conf);
fs.getWorkingDirectory();
Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
String jarPath = ClassUtil.findContainingJar(FilterLinesByWord.class);
if (jarPath == null) {
throw new TezUncheckedException("Could not find any jar containing" + FilterLinesByWord.class.getName() + " in the classpath");
}
Path remoteJarPath = fs.makeQualified(new Path(stagingDir, "dag_job.jar"));
fs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, conf);
Map<String, LocalResource> commonLocalResources = new TreeMap<String, LocalResource>();
LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime());
commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials);
// Why do I need to start the TezSession.
tezSession.start();
Configuration stage1Conf = new JobConf(conf);
stage1Conf.set(FILTER_PARAM_NAME, filterWord);
Configuration stage2Conf = new JobConf(conf);
stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
stage2Conf.setBoolean("mapred.mapper.new-api", false);
UserPayload stage1Payload = TezUtils.createUserPayloadFromConf(stage1Conf);
// Setup stage1 Vertex
Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload)).addTaskLocalFiles(commonLocalResources);
DataSourceDescriptor dsd;
if (generateSplitsInClient) {
// TODO TEZ-1406. Dont' use MRInputLegacy
stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
stage1Conf.setBoolean("mapred.mapper.new-api", false);
dsd = MRInputHelpers.configureMRInputWithLegacySplitGeneration(stage1Conf, stagingDir, true);
} else {
dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath).groupSplits(false).build();
}
stage1Vertex.addDataSource("MRInput", dsd);
// Setup stage2 Vertex
Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf)), 1);
stage2Vertex.addTaskLocalFiles(commonLocalResources);
// Configure the Output for stage2
OutputDescriptor od = OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(stage2Conf));
OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(MROutputCommitter.class.getName());
stage2Vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, ocd, null));
UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), TextLongPair.class.getName()).setFromConfiguration(tezConf).build();
DAG dag = DAG.create("FilterLinesByWord");
Edge edge = Edge.create(stage1Vertex, stage2Vertex, edgeConf.createDefaultBroadcastEdgeProperty());
dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
LOG.info("Submitting DAG to Tez Session");
DAGClient dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session");
DAGStatus dagStatus = null;
String[] vNames = { "stage1", "stage2" };
try {
while (true) {
dagStatus = dagClient.getDAGStatus(null);
if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() == DAGStatus.State.RUNNING) {
try {
ExampleDriver.printDAGStatus(dagClient, vNames);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(null);
} catch (TezException e) {
LOG.error("Failed to get application progress. Exiting");
return -1;
}
}
dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class TestTaskErrorsUsingLocalMode method getTezClient.
private TezClient getTezClient(String name) throws IOException, TezException {
TezConfiguration tezConf1 = new TezConfiguration();
tezConf1.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
tezConf1.set("fs.defaultFS", "file:///");
tezConf1.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
tezConf1.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500);
TezClient tezClient1 = TezClient.create(name, tezConf1, true);
tezClient1.start();
return tezClient1;
}
use of org.apache.tez.client.TezClient in project tez by apache.
the class TestTaskErrorsUsingLocalMode method testSelfKillReported.
@Test(timeout = 20000)
public void testSelfKillReported() throws IOException, TezException, InterruptedException {
TezClient tezClient = getTezClient("testSelfKillReported");
DAGClient dagClient = null;
try {
FailingProcessor.configureForKilled(10);
DAG dag = DAG.create("testSelfKillReported").addVertex(Vertex.create(VERTEX_NAME, ProcessorDescriptor.create(FailingProcessor.class.getName()), 1));
dagClient = tezClient.submitDAG(dag);
dagClient.waitForCompletion();
assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
assertEquals(10, dagClient.getVertexStatus(VERTEX_NAME, null).getProgress().getKilledTaskAttemptCount());
} finally {
if (dagClient != null) {
dagClient.close();
}
tezClient.stop();
}
}
Aggregations