use of org.apache.tez.examples.OrderedWordCount in project tez by apache.
the class TestShuffleHandlerJobs method testOrderedWordCount.
@Test(timeout = 300000)
public void testOrderedWordCount() throws Exception {
String inputDirStr = "/tmp/owc-input/";
Path inputDir = new Path(inputDirStr);
Path stagingDirPath = new Path("/tmp/owc-staging-dir");
remoteFs.mkdirs(inputDir);
remoteFs.mkdirs(stagingDirPath);
generateOrderedWordCountInput(inputDir, remoteFs);
String outputDirStr = "/tmp/owc-output/";
Path outputDir = new Path(outputDirStr);
TezConfiguration tezConf = new TezConfiguration(tezCluster.getConfig());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
tezConf.set(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, ShuffleHandler.TEZ_SHUFFLE_SERVICEID);
tezConf.setBoolean(TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION, true);
tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
tezConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false);
tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false);
TezClient tezSession = TezClient.create("WordCountTest", tezConf);
tezSession.start();
try {
final OrderedWordCount job = new OrderedWordCount();
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-counter", inputDirStr, outputDirStr, "10" }, tezSession) == 0);
verifyOutput(outputDir, remoteFs);
tezSession.stop();
ClientRMService rmService = tezCluster.getResourceManager().getClientRMService();
boolean isAppComplete = false;
while (!isAppComplete) {
GetApplicationReportResponse resp = rmService.getApplicationReport(new GetApplicationReportRequest() {
@Override
public ApplicationId getApplicationId() {
return job.getAppId();
}
@Override
public void setApplicationId(ApplicationId applicationId) {
}
});
if (resp.getApplicationReport().getYarnApplicationState() == YarnApplicationState.FINISHED) {
isAppComplete = true;
}
Thread.sleep(100);
}
for (int i = 0; i < NUM_NMS; i++) {
String appPath = tezCluster.getTestWorkDir() + "/" + this.getClass().getName() + "-localDir-nm-" + i + "_0/usercache/" + UserGroupInformation.getCurrentUser().getUserName() + "/appcache/" + job.getAppId();
String dagPathStr = appPath + "/dag_1";
File fs = new File(dagPathStr);
Assert.assertFalse(fs.exists());
fs = new File(appPath);
Assert.assertTrue(fs.exists());
}
} finally {
remoteFs.delete(stagingDirPath, true);
}
}
use of org.apache.tez.examples.OrderedWordCount in project tez by apache.
the class TestRecovery method testOrderedWordCount.
private void testOrderedWordCount(SimpleShutdownCondition shutdownCondition, boolean enableAutoParallelism, boolean generateSplitInClient) throws Exception {
LOG.info("shutdownCondition:" + shutdownCondition.getEventType() + ", event=" + shutdownCondition.getEvent());
String inputDirStr = "/tmp/owc-input/";
Path inputDir = new Path(inputDirStr);
Path stagingDirPath = new Path("/tmp/owc-staging-dir");
remoteFs.mkdirs(inputDir);
remoteFs.mkdirs(stagingDirPath);
TestTezJobs.generateOrderedWordCountInput(inputDir, remoteFs);
String outputDirStr = "/tmp/owc-output/";
Path outputDir = new Path(outputDirStr);
TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4);
tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS, RecoveryServiceWithEventHandlingHook.class.getName());
tezConf.set(RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS, SimpleRecoveryEventHook.class.getName());
tezConf.set(SimpleRecoveryEventHook.SIMPLE_SHUTDOWN_CONDITION, shutdownCondition.serialize());
tezConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, enableAutoParallelism);
tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false);
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG");
OrderedWordCount job = new OrderedWordCount();
if (generateSplitInClient) {
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-generateSplitInClient", inputDirStr, outputDirStr, "5" }, null) == 0);
} else {
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { inputDirStr, outputDirStr, "5" }, null) == 0);
}
TestTezJobs.verifyOutput(outputDir, remoteFs);
List<HistoryEvent> historyEventsOfAttempt1 = RecoveryParser.readRecoveryEvents(tezConf, job.getAppId(), 1);
HistoryEvent lastEvent = historyEventsOfAttempt1.get(historyEventsOfAttempt1.size() - 1);
assertEquals(shutdownCondition.getEvent().getEventType(), lastEvent.getEventType());
assertTrue(shutdownCondition.match(lastEvent));
}
use of org.apache.tez.examples.OrderedWordCount in project tez by apache.
the class TestRecovery method testOrderedWordCountMultipleRoundRecoverying.
private void testOrderedWordCountMultipleRoundRecoverying(RecoveryServiceWithEventHandlingHook.MultipleRoundShutdownCondition shutdownCondition, boolean enableAutoParallelism, boolean generateSplitInClient) throws Exception {
for (int i = 0; i < shutdownCondition.size(); i++) {
SimpleShutdownCondition condition = shutdownCondition.getSimpleShutdownCondition(i);
LOG.info("ShutdownCondition:" + condition.getEventType() + ", event=" + condition.getEvent());
}
String inputDirStr = "/tmp/owc-input/";
Path inputDir = new Path(inputDirStr);
Path stagingDirPath = new Path("/tmp/owc-staging-dir");
remoteFs.mkdirs(inputDir);
remoteFs.mkdirs(stagingDirPath);
TestTezJobs.generateOrderedWordCountInput(inputDir, remoteFs);
String outputDirStr = "/tmp/owc-output/";
Path outputDir = new Path(outputDirStr);
TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4);
tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS, RecoveryServiceWithEventHandlingHook.class.getName());
tezConf.set(RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS, RecoveryServiceWithEventHandlingHook.MultipleRoundRecoveryEventHook.class.getName());
tezConf.set(RecoveryServiceWithEventHandlingHook.MultipleRoundRecoveryEventHook.MULTIPLE_ROUND_SHUTDOWN_CONDITION, shutdownCondition.serialize());
tezConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, enableAutoParallelism);
tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false);
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
OrderedWordCount job = new OrderedWordCount();
if (generateSplitInClient) {
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-generateSplitInClient", inputDirStr, outputDirStr, "5" }, null) == 0);
} else {
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { inputDirStr, outputDirStr, "5" }, null) == 0);
}
TestTezJobs.verifyOutput(outputDir, remoteFs);
}
use of org.apache.tez.examples.OrderedWordCount in project tez by apache.
the class TestTezJobs method testOrderedWordCountDisableSplitGrouping.
@Test(timeout = 60000)
public void testOrderedWordCountDisableSplitGrouping() throws Exception {
String inputDirStr = TEST_ROOT_DIR + "/tmp/owc-input/";
Path inputDir = new Path(inputDirStr);
Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/owc-staging-dir");
localFs.mkdirs(inputDir);
localFs.mkdirs(stagingDirPath);
generateOrderedWordCountInput(inputDir, localFs);
String outputDirStr = TEST_ROOT_DIR + "/tmp/owc-output/";
localFs.delete(new Path(outputDirStr), true);
Path outputDir = new Path(outputDirStr);
TezConfiguration tezConf = new TezConfiguration(conf);
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
TezClient tezSession = null;
try {
OrderedWordCount job = new OrderedWordCount();
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-counter", "-local", "-disableSplitGrouping", inputDirStr, outputDirStr, "2" }, null) == 0);
verifyOutput(outputDir, localFs);
} finally {
localFs.delete(stagingDirPath, true);
if (tezSession != null) {
tezSession.stop();
}
}
}
use of org.apache.tez.examples.OrderedWordCount in project tez by apache.
the class TestTezJobs method testOrderedWordCount.
@Test(timeout = 60000)
public void testOrderedWordCount() throws Exception {
String inputDirStr = "/tmp/owc-input/";
Path inputDir = new Path(inputDirStr);
Path stagingDirPath = new Path("/tmp/owc-staging-dir");
remoteFs.mkdirs(inputDir);
remoteFs.mkdirs(stagingDirPath);
generateOrderedWordCountInput(inputDir, remoteFs);
String outputDirStr = "/tmp/owc-output/";
Path outputDir = new Path(outputDirStr);
TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
TezClient tezSession = null;
try {
OrderedWordCount job = new OrderedWordCount();
Assert.assertTrue("OrderedWordCount failed", job.run(tezConf, new String[] { "-counter", inputDirStr, outputDirStr, "2" }, null) == 0);
verifyOutput(outputDir, remoteFs);
} finally {
remoteFs.delete(stagingDirPath, true);
if (tezSession != null) {
tezSession.stop();
}
}
}
Aggregations