Search in sources :

Example 1 with JoinDataGen

use of org.apache.tez.examples.JoinDataGen in project tez by apache.

the class TestExtServicesWithLocalMode method test1.

@Test(timeout = 30000)
public void test1() throws Exception {
    UserPayload userPayload = TezUtils.createUserPayloadFromConf(confForJobs);
    TaskSchedulerDescriptor[] taskSchedulerDescriptors = new TaskSchedulerDescriptor[] { TaskSchedulerDescriptor.create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskSchedulerService.class.getName()).setUserPayload(userPayload) };
    ContainerLauncherDescriptor[] containerLauncherDescriptors = new ContainerLauncherDescriptor[] { ContainerLauncherDescriptor.create(EXT_PUSH_ENTITY_NAME, TezTestServiceNoOpContainerLauncher.class.getName()).setUserPayload(userPayload) };
    TaskCommunicatorDescriptor[] taskCommunicatorDescriptors = new TaskCommunicatorDescriptor[] { TaskCommunicatorDescriptor.create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskCommunicatorImpl.class.getName()).setUserPayload(userPayload) };
    ServicePluginsDescriptor servicePluginsDescriptor = ServicePluginsDescriptor.create(true, false, taskSchedulerDescriptors, containerLauncherDescriptors, taskCommunicatorDescriptors);
    TezConfiguration tezConf = new TezConfiguration(confForJobs);
    TezClient tezClient = TezClient.newBuilder("test1", tezConf).setIsSession(true).setServicePluginDescriptor(servicePluginsDescriptor).build();
    try {
        tezClient.start();
        Path dataPath1 = new Path(SRC_DATA_DIR, "inPath1");
        Path dataPath2 = new Path(SRC_DATA_DIR, "inPath2");
        Path expectedResultPath = new Path(SRC_DATA_DIR, "expectedOutputPath");
        JoinDataGen dataGen = new JoinDataGen();
        String[] dataGenArgs = new String[] { dataPath1.toString(), "1048576", dataPath2.toString(), "524288", expectedResultPath.toString(), "2" };
        assertEquals(0, dataGen.run(tezConf, dataGenArgs, tezClient));
        Path outputPath = new Path(SRC_DATA_DIR, "outPath");
        HashJoinExample joinExample = new HashJoinExample();
        String[] args = new String[] { dataPath1.toString(), dataPath2.toString(), "2", outputPath.toString() };
        assertEquals(0, joinExample.run(tezConf, args, tezClient));
        LOG.info("Completed generating Data - Expected Hash Result and Actual Join Result");
        assertEquals(0, tezTestServiceCluster.getNumSubmissions());
        // ext can consume from ext.
        runJoinValidate(tezClient, "allInExt", 7, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, EXECUTION_CONTEXT_EXT_SERVICE_PUSH);
        LOG.info("Completed allInExt");
        // uber can consume from uber.
        runJoinValidate(tezClient, "noneInExt", 0, null, null, null);
        LOG.info("Completed noneInExt");
        // uber can consume from ext
        runJoinValidate(tezClient, "lhsInExt", 2, EXECUTION_CONTEXT_EXT_SERVICE_PUSH, null, null);
        LOG.info("Completed lhsInExt");
    // ext cannot consume from uber in this mode since there's no shuffle handler working,
    // and the local data transfer semantics may not match.
    } finally {
        tezClient.stop();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) UserPayload(org.apache.tez.dag.api.UserPayload) TaskSchedulerDescriptor(org.apache.tez.serviceplugins.api.TaskSchedulerDescriptor) ServicePluginsDescriptor(org.apache.tez.serviceplugins.api.ServicePluginsDescriptor) TaskCommunicatorDescriptor(org.apache.tez.serviceplugins.api.TaskCommunicatorDescriptor) TezClient(org.apache.tez.client.TezClient) ContainerLauncherDescriptor(org.apache.tez.serviceplugins.api.ContainerLauncherDescriptor) JoinDataGen(org.apache.tez.examples.JoinDataGen) HashJoinExample(org.apache.tez.examples.HashJoinExample) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 2 with JoinDataGen

use of org.apache.tez.examples.JoinDataGen in project tez by apache.

the class ExternalTezServiceTestHelper method setupHashJoinData.

public void setupHashJoinData(Path srcDataDir, Path dataPath1, Path dataPath2, Path expectedResultPath, Path outputPath) throws Exception {
    remoteFs.mkdirs(srcDataDir);
    TezConfiguration tezConf = new TezConfiguration(confForJobs);
    // Generate join data - with 2 tasks.
    JoinDataGen dataGen = new JoinDataGen();
    String[] dataGenArgs = new String[] { dataPath1.toString(), "1048576", dataPath2.toString(), "524288", expectedResultPath.toString(), "2" };
    assertEquals(0, dataGen.run(tezConf, dataGenArgs, sharedTezClient));
    // Run the actual join - with 2 reducers
    HashJoinExample joinExample = new HashJoinExample();
    String[] args = new String[] { dataPath1.toString(), dataPath2.toString(), "2", outputPath.toString() };
    assertEquals(0, joinExample.run(tezConf, args, sharedTezClient));
    LOG.info("Completed generating Data - Expected Hash Result and Actual Join Result");
}
Also used : JoinDataGen(org.apache.tez.examples.JoinDataGen) HashJoinExample(org.apache.tez.examples.HashJoinExample) TezConfiguration(org.apache.tez.dag.api.TezConfiguration)

Example 3 with JoinDataGen

use of org.apache.tez.examples.JoinDataGen in project tez by apache.

the class TestTezJobs method testSortMergeJoinExamplePipeline.

/**
 * test whole {@link SortMergeJoinExample} pipeline as following: <br>
 * {@link JoinDataGen} -> {@link SortMergeJoinExample} -> {@link JoinValidate}
 * @throws Exception
 */
@Test(timeout = 120000)
public void testSortMergeJoinExamplePipeline() throws Exception {
    Path testDir = new Path("/tmp/testSortMergeExample");
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    remoteFs.mkdirs(stagingDirPath);
    remoteFs.mkdirs(testDir);
    Path dataPath1 = new Path(testDir, "inPath1");
    Path dataPath2 = new Path(testDir, "inPath2");
    Path expectedOutputPath = new Path(testDir, "expectedOutputPath");
    Path outPath = new Path(testDir, "outPath");
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
    TezClient tezSession = null;
    try {
        tezSession = TezClient.create("SortMergeExampleSession", tezConf, true);
        tezSession.start();
        JoinDataGen dataGen = new JoinDataGen();
        String[] dataGenArgs = new String[] { dataPath1.toString(), "1048576", dataPath2.toString(), "524288", expectedOutputPath.toString(), "2" };
        assertEquals(0, dataGen.run(tezConf, dataGenArgs, tezSession));
        SortMergeJoinExample joinExample = new SortMergeJoinExample();
        String[] args = new String[] { dataPath1.toString(), dataPath2.toString(), "2", outPath.toString() };
        assertEquals(0, joinExample.run(tezConf, args, tezSession));
        JoinValidate joinValidate = new JoinValidate();
        String[] validateArgs = new String[] { expectedOutputPath.toString(), outPath.toString(), "3" };
        assertEquals(0, joinValidate.run(tezConf, validateArgs, tezSession));
    } finally {
        if (tezSession != null) {
            tezSession.stop();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) JoinDataGen(org.apache.tez.examples.JoinDataGen) SortMergeJoinExample(org.apache.tez.examples.SortMergeJoinExample) JoinValidate(org.apache.tez.examples.JoinValidate) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Example 4 with JoinDataGen

use of org.apache.tez.examples.JoinDataGen in project tez by apache.

the class TestTezJobs method testHashJoinExamplePipeline.

/**
 * test whole {@link HashJoinExample} pipeline as following: <br>
 * {@link JoinDataGen} -> {@link HashJoinExample} -> {@link JoinValidate}
 * @throws Exception
 */
@Test(timeout = 120000)
public void testHashJoinExamplePipeline() throws Exception {
    Path testDir = new Path("/tmp/testHashJoinExample");
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    remoteFs.mkdirs(stagingDirPath);
    remoteFs.mkdirs(testDir);
    Path dataPath1 = new Path(testDir, "inPath1");
    Path dataPath2 = new Path(testDir, "inPath2");
    Path expectedOutputPath = new Path(testDir, "expectedOutputPath");
    Path outPath = new Path(testDir, "outPath");
    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString());
    TezClient tezSession = null;
    try {
        tezSession = TezClient.create("HashJoinExampleSession", tezConf, true);
        tezSession.start();
        JoinDataGen dataGen = new JoinDataGen();
        String[] dataGenArgs = new String[] { "-counter", dataPath1.toString(), "1048576", dataPath2.toString(), "524288", expectedOutputPath.toString(), "2" };
        assertEquals(0, dataGen.run(tezConf, dataGenArgs, tezSession));
        HashJoinExample joinExample = new HashJoinExample();
        String[] args = new String[] { dataPath1.toString(), dataPath2.toString(), "2", outPath.toString() };
        assertEquals(0, joinExample.run(tezConf, args, tezSession));
        JoinValidate joinValidate = new JoinValidate();
        String[] validateArgs = new String[] { "-counter", expectedOutputPath.toString(), outPath.toString(), "3" };
        assertEquals(0, joinValidate.run(tezConf, validateArgs, tezSession));
    } finally {
        if (tezSession != null) {
            tezSession.stop();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) JoinDataGen(org.apache.tez.examples.JoinDataGen) HashJoinExample(org.apache.tez.examples.HashJoinExample) JoinValidate(org.apache.tez.examples.JoinValidate) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezClient(org.apache.tez.client.TezClient) Test(org.junit.Test)

Aggregations

TezConfiguration (org.apache.tez.dag.api.TezConfiguration)4 JoinDataGen (org.apache.tez.examples.JoinDataGen)4 Path (org.apache.hadoop.fs.Path)3 TezClient (org.apache.tez.client.TezClient)3 HashJoinExample (org.apache.tez.examples.HashJoinExample)3 Test (org.junit.Test)3 JoinValidate (org.apache.tez.examples.JoinValidate)2 UserPayload (org.apache.tez.dag.api.UserPayload)1 SortMergeJoinExample (org.apache.tez.examples.SortMergeJoinExample)1 ContainerLauncherDescriptor (org.apache.tez.serviceplugins.api.ContainerLauncherDescriptor)1 ServicePluginsDescriptor (org.apache.tez.serviceplugins.api.ServicePluginsDescriptor)1 TaskCommunicatorDescriptor (org.apache.tez.serviceplugins.api.TaskCommunicatorDescriptor)1 TaskSchedulerDescriptor (org.apache.tez.serviceplugins.api.TaskSchedulerDescriptor)1