Search in sources :

Example 1 with ProgramManager

use of co.cask.cdap.test.ProgramManager in project cdap by caskdata.

the class PartitionConsumingTestRun method testWordCountOnFileSet.

private void testWordCountOnFileSet(Function<ApplicationManager, ProgramManager> runProgram, boolean produceOutputPartitionEachRun) throws Exception {
    ApplicationManager applicationManager = deployApplication(AppWithPartitionConsumers.class);
    ServiceManager serviceManager = applicationManager.getServiceManager("DatasetService").start();
    serviceManager.waitForStatus(true);
    URL serviceURL = serviceManager.getServiceURL();
    // write a file to the file set using the service and run the WordCount MapReduce job on that one partition
    createPartition(serviceURL, LINE1, "1");
    ProgramManager programManager = runProgram.apply(applicationManager);
    programManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    Assert.assertEquals(new Long(2), getCount(serviceURL, "a"));
    Assert.assertEquals(new Long(1), getCount(serviceURL, "b"));
    Assert.assertEquals(new Long(0), getCount(serviceURL, "c"));
    // create two additional partitions
    createPartition(serviceURL, LINE2, "2");
    createPartition(serviceURL, LINE3, "3");
    // running the program job now processes these two new partitions (LINE2 and LINE3) and updates the counts
    // dataset accordingly
    programManager = runProgram.apply(applicationManager);
    programManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
    Assert.assertEquals(new Long(3), getCount(serviceURL, "a"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "b"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "c"));
    // running the program without adding new partitions does not affect the counts dataset
    programManager = runProgram.apply(applicationManager);
    programManager.waitForRuns(ProgramRunStatus.COMPLETED, 3, 5, TimeUnit.MINUTES);
    Assert.assertEquals(new Long(3), getCount(serviceURL, "a"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "b"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "c"));
    DataSetManager<PartitionedFileSet> outputLines = getDataset("outputLines");
    Set<PartitionDetail> partitions = outputLines.get().getPartitions(PartitionFilter.ALWAYS_MATCH);
    // each of the three MapReduce runs produces an output partition (even if there's no input data)
    // however, Worker run doesn't produce a new output partition if there's no new input partition
    Assert.assertEquals(produceOutputPartitionEachRun ? 3 : 2, partitions.size());
    // we only store the counts to the "outputLines" dataset
    List<String> expectedCounts = Lists.newArrayList("1", "1", "2", "2", "3");
    List<String> outputRecords = getDataFromExplore("outputLines");
    Collections.sort(outputRecords);
    Assert.assertEquals(expectedCounts, outputRecords);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ServiceManager(co.cask.cdap.test.ServiceManager) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) URL(java.net.URL) ProgramManager(co.cask.cdap.test.ProgramManager)

Aggregations

PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)1 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)1 ApplicationManager (co.cask.cdap.test.ApplicationManager)1 ProgramManager (co.cask.cdap.test.ProgramManager)1 ServiceManager (co.cask.cdap.test.ServiceManager)1 URL (java.net.URL)1