use of co.cask.cdap.api.dataset.lib.TimePartitionedFileSet in project cdap by caskdata.
the class PartitionCorrectorTestRun method testPartitionCorrector.
@Test
public void testPartitionCorrector() throws Exception {
ApplicationManager appManager = deployApplication(PartitionExploreCorrectorTestApp.class);
final int numPartitions = 10;
addDatasetInstance(TimePartitionedFileSet.class.getName(), "tpfs", PartitionedFileSetProperties.builder().setExploreFormat("csv").setExploreSchema("key int, value string").setEnableExploreOnCreate(true).build());
DataSetManager<TimePartitionedFileSet> tpfsManager = getDataset("tpfs");
Date date = DATE_FORMAT.parse("6/4/12 10:00 am");
long baseTime = date.getTime();
for (int i = 0; i < numPartitions; i++) {
createPartition(tpfsManager, baseTime + TimeUnit.MINUTES.toMillis(1) * i, i);
}
validateAllPartitions(numPartitions);
dropAllPartitions();
validateAllPartitions(0);
// all partitions are missing. drop/recrete Hive table and add all partitions
WorkerManager workerManager = appManager.getWorkerManager("PartitionWorker").start(ImmutableMap.of("dataset.name", "tpfs", "batch.size", "5", "verbose", "true"));
workerManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
validateAllPartitions(numPartitions);
dropAllPartitions();
for (int i = numPartitions; i < 2 * numPartitions; i++) {
createPartition(tpfsManager, baseTime + TimeUnit.MINUTES.toMillis(1) * i, i);
}
validateAllPartitions(numPartitions);
// some partitions are missing, some present keep the Hive table and try to add all partitions
workerManager = appManager.getWorkerManager("PartitionWorker").start(ImmutableMap.of("dataset.name", "tpfs", "batch.size", "8", "verbose", "false", "disable.explore", "false"));
workerManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 60, TimeUnit.SECONDS);
validateAllPartitions(2 * numPartitions);
}
Aggregations