Search in sources :

Example 6 with HashJoinExample

use of org.apache.tez.examples.HashJoinExample in project tez by apache.

the class TestTezJobs method testHashJoinExampleDisableSplitGrouping.

@Test(timeout = 60000)
public void testHashJoinExampleDisableSplitGrouping() throws Exception {
    HashJoinExample hashJoinExample = new HashJoinExample();
    hashJoinExample.setConf(conf);
    Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/tez-staging-dir");
    Path inPath1 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath1");
    Path inPath2 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath2");
    Path outPath = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/outPath");
    localFs.delete(outPath, true);
    localFs.mkdirs(inPath1);
    localFs.mkdirs(inPath2);
    localFs.mkdirs(stagingDirPath);
    Set<String> expectedResult = new HashSet<String>();
    FSDataOutputStream out1 = localFs.create(new Path(inPath1, "file"));
    FSDataOutputStream out2 = localFs.create(new Path(inPath2, "file"));
    BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
    BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
    for (int i = 0; i < 20; i++) {
        String term = "term" + i;
        writer1.write(term);
        writer1.newLine();
        if (i % 2 == 0) {
            writer2.write(term);
            writer2.newLine();
            expectedResult.add(term);
        }
    }
    writer1.close();
    writer2.close();
    out1.close();
    out2.close();
    String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(), "-counter", "-local", "-disableSplitGrouping", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    assertEquals(0, hashJoinExample.run(args));
    FileStatus[] statuses = localFs.listStatus(outPath, new PathFilter() {

        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
    assertEquals(1, statuses.length);
    FSDataInputStream inStream = localFs.open(statuses[0].getPath());
    BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
    String line;
    while ((line = reader.readLine()) != null) {
        assertTrue(expectedResult.remove(line));
    }
    reader.close();
    inStream.close();
    assertEquals(0, expectedResult.size());
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) BufferedWriter(java.io.BufferedWriter) HashJoinExample(org.apache.tez.examples.HashJoinExample) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) OutputStreamWriter(java.io.OutputStreamWriter) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

HashJoinExample (org.apache.tez.examples.HashJoinExample)6 Path (org.apache.hadoop.fs.Path)5 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)4 Test (org.junit.Test)4 BufferedReader (java.io.BufferedReader)3 BufferedWriter (java.io.BufferedWriter)3 InputStreamReader (java.io.InputStreamReader)3 OutputStreamWriter (java.io.OutputStreamWriter)3 HashSet (java.util.HashSet)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 PathFilter (org.apache.hadoop.fs.PathFilter)3 JoinDataGen (org.apache.tez.examples.JoinDataGen)3 TezClient (org.apache.tez.client.TezClient)2 UserPayload (org.apache.tez.dag.api.UserPayload)1 HistoryEvent (org.apache.tez.dag.history.HistoryEvent)1 JoinValidate (org.apache.tez.examples.JoinValidate)1 ContainerLauncherDescriptor (org.apache.tez.serviceplugins.api.ContainerLauncherDescriptor)1 ServicePluginsDescriptor (org.apache.tez.serviceplugins.api.ServicePluginsDescriptor)1