use of co.cask.cdap.test.ProgramManager in project cdap by caskdata.
the class PartitionConsumingTestRun method testWordCountOnFileSet.
private void testWordCountOnFileSet(Function<ApplicationManager, ProgramManager> runProgram, boolean produceOutputPartitionEachRun) throws Exception {
ApplicationManager applicationManager = deployApplication(AppWithPartitionConsumers.class);
ServiceManager serviceManager = applicationManager.getServiceManager("DatasetService").start();
serviceManager.waitForStatus(true);
URL serviceURL = serviceManager.getServiceURL();
// write a file to the file set using the service and run the WordCount MapReduce job on that one partition
createPartition(serviceURL, LINE1, "1");
ProgramManager programManager = runProgram.apply(applicationManager);
programManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
Assert.assertEquals(new Long(2), getCount(serviceURL, "a"));
Assert.assertEquals(new Long(1), getCount(serviceURL, "b"));
Assert.assertEquals(new Long(0), getCount(serviceURL, "c"));
// create two additional partitions
createPartition(serviceURL, LINE2, "2");
createPartition(serviceURL, LINE3, "3");
// running the program job now processes these two new partitions (LINE2 and LINE3) and updates the counts
// dataset accordingly
programManager = runProgram.apply(applicationManager);
programManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
Assert.assertEquals(new Long(3), getCount(serviceURL, "a"));
Assert.assertEquals(new Long(3), getCount(serviceURL, "b"));
Assert.assertEquals(new Long(3), getCount(serviceURL, "c"));
// running the program without adding new partitions does not affect the counts dataset
programManager = runProgram.apply(applicationManager);
programManager.waitForRuns(ProgramRunStatus.COMPLETED, 3, 5, TimeUnit.MINUTES);
Assert.assertEquals(new Long(3), getCount(serviceURL, "a"));
Assert.assertEquals(new Long(3), getCount(serviceURL, "b"));
Assert.assertEquals(new Long(3), getCount(serviceURL, "c"));
DataSetManager<PartitionedFileSet> outputLines = getDataset("outputLines");
Set<PartitionDetail> partitions = outputLines.get().getPartitions(PartitionFilter.ALWAYS_MATCH);
// each of the three MapReduce runs produces an output partition (even if there's no input data)
// however, Worker run doesn't produce a new output partition if there's no new input partition
Assert.assertEquals(produceOutputPartitionEachRun ? 3 : 2, partitions.size());
// we only store the counts to the "outputLines" dataset
List<String> expectedCounts = Lists.newArrayList("1", "1", "2", "2", "3");
List<String> outputRecords = getDataFromExplore("outputLines");
Collections.sort(outputRecords);
Assert.assertEquals(expectedCounts, outputRecords);
}
Aggregations