use of org.apache.tez.examples.HashJoinExample in project tez by apache.
the class TestExtServicesWithLocalMode method test1.
@Test(timeout = 30000)
public void test1() throws Exception {
UserPayload userPayload = TezUtils.createUserPayloadFromConf(confForJobs);
TaskSchedulerDescriptor[] taskSchedulerDescriptors = new TaskSchedulerDescriptor[] { TaskSchedulerDescriptor.create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskSchedulerService.class.getName()).setUserPayload(userPayload) };
ContainerLauncherDescriptor[] containerLauncherDescriptors = new ContainerLauncherDescriptor[] { ContainerLauncherDescriptor.create(EXT_PUSH_ENTITY_NAME, TezTestServiceNoOpContainerLauncher.class.getName()).setUserPayload(userPayload) };
TaskCommunicatorDescriptor[] taskCommunicatorDescriptors = new TaskCommunicatorDescriptor[] { TaskCommunicatorDescriptor.create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskCommunicatorImpl.class.getName()).setUserPayload(userPayload) };
ServicePluginsDescriptor servicePluginsDescriptor = ServicePluginsDescriptor.create(true, false, taskSchedulerDescriptors, containerLauncherDescriptors, taskCommunicatorDescriptors);
TezConfiguration tezConf = new TezConfiguration(confForJobs);
TezClient tezClient = TezClient.newBuilder("test1", tezConf).setIsSession(true).setServicePluginDescriptor(servicePluginsDescriptor).build();
try {
tezClient.start();
Path dataPath1 = new Path(SRC_DATA_DIR, "inPath1");
Path dataPath2 = new Path(SRC_DATA_DIR, "inPath2");
Path expectedResultPath = new Path(SRC_DATA_DIR, "expectedOutputPath");
JoinDataGen dataGen = new JoinDataGen();
String[] dataGenArgs = new String[] { dataPath1.toString(), "1048576", dataPath2.toString(), "524288", expectedResultPath.toString(), "2" };
assertEquals(0, dataGen.run(tezConf, dataGenArgs, tezClient));
Path outputPath = new Path(SRC_DATA_DIR, "outPath");
HashJoinExample joinExample = new HashJoinExample();
String[] args = new String[] { dataPath1.toString(), dataPath2.toString(), "2", outputPath.toString() };
assertEquals(0, joinExample.run(tezConf, args, tezClient));
LOG.info("Completed generating Data - Expected Hash Result and Actual Join Result");
assertEquals(0, tezTestServiceCluster.getNumSubmissions());
// ext can consume from ext.
runJoinValidate(tezClient, "allInExt", 7, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, EXECUTION_CONTEXT_EXT_SERVICE_PUSH);
LOG.info("Completed allInExt");
// uber can consume from uber.
runJoinValidate(tezClient, "noneInExt", 0, null, null, null);
LOG.info("Completed noneInExt");
// uber can consume from ext
runJoinValidate(tezClient, "lhsInExt", 2, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, null, null);
LOG.info("Completed lhsInExt");
// ext cannot consume from uber in this mode since there's no shuffle handler working,
// and the local data transfer semantics may not match.
} finally {
tezClient.stop();
}
}
use of org.apache.tez.examples.HashJoinExample in project tez by apache.
the class ExternalTezServiceTestHelper method setupHashJoinData.
public void setupHashJoinData(Path srcDataDir, Path dataPath1, Path dataPath2, Path expectedResultPath, Path outputPath) throws Exception {
remoteFs.mkdirs(srcDataDir);
TezConfiguration tezConf = new TezConfiguration(confForJobs);
// Generate join data - with 2 tasks.
JoinDataGen dataGen = new JoinDataGen();
String[] dataGenArgs = new String[] { dataPath1.toString(), "1048576", dataPath2.toString(), "524288", expectedResultPath.toString(), "2" };
assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
// Run the actual join - with 2 reducers
HashJoinExample joinExample = new HashJoinExample();
String[] args = new String[] { dataPath1.toString(), dataPath2.toString(), "2", outputPath.toString() };
assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
LOG.info("Completed generating Data - Expected Hash Result and Actual Join Result");
}
use of org.apache.tez.examples.HashJoinExample in project tez by apache.
the class TestRecovery method testHashJoinExample.
private void testHashJoinExample(SimpleShutdownCondition shutdownCondition, boolean enableAutoParallelism, boolean generateSplitInClient) throws Exception {
HashJoinExample hashJoinExample = new HashJoinExample();
TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4);
tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS, RecoveryServiceWithEventHandlingHook.class.getName());
tezConf.set(RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS, SimpleRecoveryEventHook.class.getName());
tezConf.set(SimpleRecoveryEventHook.SIMPLE_SHUTDOWN_CONDITION, shutdownCondition.serialize());
tezConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, enableAutoParallelism);
tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false);
tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG");
hashJoinExample.setConf(tezConf);
Path stagingDirPath = new Path("/tmp/tez-staging-dir");
Path inPath1 = new Path("/tmp/hashJoin/inPath1");
Path inPath2 = new Path("/tmp/hashJoin/inPath2");
Path outPath = new Path("/tmp/hashJoin/outPath");
remoteFs.delete(outPath, true);
remoteFs.mkdirs(inPath1);
remoteFs.mkdirs(inPath2);
remoteFs.mkdirs(stagingDirPath);
Set<String> expectedResult = new HashSet<String>();
FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
for (int i = 0; i < 20; i++) {
String term = "term" + i;
writer1.write(term);
writer1.newLine();
if (i % 2 == 0) {
writer2.write(term);
writer2.newLine();
expectedResult.add(term);
}
}
writer1.close();
writer2.close();
out1.close();
out2.close();
String[] args = null;
if (generateSplitInClient) {
args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-generateSplitInClient", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
} else {
args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
}
assertEquals(0, hashJoinExample.run(args));
FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
public boolean accept(Path p) {
String name = p.getName();
return !name.startsWith("_") && !name.startsWith(".");
}
});
assertEquals(1, statuses.length);
FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
String line;
while ((line = reader.readLine()) != null) {
assertTrue(expectedResult.remove(line));
}
reader.close();
inStream.close();
assertEquals(0, expectedResult.size());
List<HistoryEvent> historyEventsOfAttempt1 = RecoveryParser.readRecoveryEvents(tezConf, hashJoinExample.getAppId(), 1);
HistoryEvent lastEvent = historyEventsOfAttempt1.get(historyEventsOfAttempt1.size() - 1);
assertEquals(shutdownCondition.getEvent().getEventType(), lastEvent.getEventType());
assertTrue(shutdownCondition.match(lastEvent));
}
use of org.apache.tez.examples.HashJoinExample in project tez by apache.
the class TestTezJobs method testHashJoinExample.
@Test(timeout = 60000)
public void testHashJoinExample() throws Exception {
HashJoinExample hashJoinExample = new HashJoinExample();
hashJoinExample.setConf(mrrTezCluster.getConfig());
Path stagingDirPath = new Path("/tmp/tez-staging-dir");
Path inPath1 = new Path("/tmp/hashJoin/inPath1");
Path inPath2 = new Path("/tmp/hashJoin/inPath2");
Path outPath = new Path("/tmp/hashJoin/outPath");
remoteFs.mkdirs(inPath1);
remoteFs.mkdirs(inPath2);
remoteFs.mkdirs(stagingDirPath);
Set<String> expectedResult = new HashSet<String>();
FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
for (int i = 0; i < 20; i++) {
String term = "term" + i;
writer1.write(term);
writer1.newLine();
if (i % 2 == 0) {
writer2.write(term);
writer2.newLine();
expectedResult.add(term);
}
}
writer1.close();
writer2.close();
out1.close();
out2.close();
String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-counter", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
assertEquals(0, hashJoinExample.run(args));
FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
public boolean accept(Path p) {
String name = p.getName();
return !name.startsWith("_") && !name.startsWith(".");
}
});
assertEquals(1, statuses.length);
FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
String line;
while ((line = reader.readLine()) != null) {
assertTrue(expectedResult.remove(line));
}
reader.close();
inStream.close();
assertEquals(0, expectedResult.size());
}
use of org.apache.tez.examples.HashJoinExample in project tez by apache.
the class TestTezJobs method testHashJoinExamplePipeline.
/**
* test whole {@link HashJoinExample} pipeline as following: <br>
* {@link JoinDataGen} -> {@link HashJoinExample} -> {@link JoinValidate}
* @throws Exception
*/
@Test(timeout = 120000)
public void testHashJoinExamplePipeline() throws Exception {
Path testDir = new Path("/tmp/testHashJoinExample");
Path stagingDirPath = new Path("/tmp/tez-staging-dir");
remoteFs.mkdirs(stagingDirPath);
remoteFs.mkdirs(testDir);
Path dataPath1 = new Path(testDir, "inPath1");
Path dataPath2 = new Path(testDir, "inPath2");
Path expectedOutputPath = new Path(testDir, "expectedOutputPath");
Path outPath = new Path(testDir, "outPath");
TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
TezClient tezSession = null;
try {
tezSession = TezClient.create("HashJoinExampleSession", tezConf, true);
tezSession.start();
JoinDataGen dataGen = new JoinDataGen();
String[] dataGenArgs = new String[] { "-counter", dataPath1.toString(), "1048576", dataPath2.toString(), "524288", expectedOutputPath.toString(), "2" };
assertEquals(0, dataGen.run(tezConf, dataGenArgs, tezSession));
HashJoinExample joinExample = new HashJoinExample();
String[] args = new String[] { dataPath1.toString(), dataPath2.toString(), "2", outPath.toString() };
assertEquals(0, joinExample.run(tezConf, args, tezSession));
JoinValidate joinValidate = new JoinValidate();
String[] validateArgs = new String[] { "-counter", expectedOutputPath.toString(), outPath.toString(), "3" };
assertEquals(0, joinValidate.run(tezConf, validateArgs, tezSession));
} finally {
if (tezSession != null) {
tezSession.stop();
}
}
}
Aggregations