Search in sources :

Example 6 with InstanceNotFoundException

use of co.cask.cdap.api.dataset.InstanceNotFoundException in project cdap by caskdata.

the class DynamicPartitioningTestRun method testDynamicPartitioningMRWithFailure.

private void testDynamicPartitioningMRWithFailure(ApplicationManager appManager, String dsWithExistingPartition, String... outputs) throws Exception {
    // set up the output datasets
    String outputArg = "";
    for (String dataset : outputs) {
        outputArg += dataset + " ";
        try {
            deleteDatasetInstance(testSpace.dataset(dataset));
        } catch (InstanceNotFoundException e) {
        // may be expected. I wish the test framework had truncate()
        }
        addDatasetInstance(PartitionedFileSet.class.getName(), testSpace.dataset(dataset), PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING).setEnableExploreOnCreate(true).setOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class).setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",").setExploreFormat("csv").setExploreSchema("key string, value string").build());
    }
    outputArg = outputArg.trim();
    // create partition (x="1") in one of the outputs
    DataSetManager<PartitionedFileSet> pfs = getDataset(testSpace.dataset(dsWithExistingPartition));
    Location loc = pfs.get().getEmbeddedFileSet().getLocation("some/path");
    OutputStream os = loc.append("part1").getOutputStream();
    try (Writer writer = new OutputStreamWriter(os)) {
        writer.write("1,x\n");
    }
    pfs.get().addPartition(PartitionKey.builder().addStringField("x", "1").build(), "some/path");
    pfs.flush();
    validatePartitions(dsWithExistingPartition, true);
    Map<String, String> arguments = ImmutableMap.of("outputs", outputArg);
    final MapReduceManager mrManager = appManager.getMapReduceManager("DynamicPartitioningMR");
    final Set<RunRecord> oldRunRecords = new HashSet<>(mrManager.getHistory());
    mrManager.start(arguments);
    // Wait for the new run record to appear and finished running.
    final AtomicReference<RunRecord> lastRunRecord = new AtomicReference<>();
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            Set<RunRecord> runRecords = Sets.difference(new HashSet<>(mrManager.getHistory()), oldRunRecords);
            if (runRecords.isEmpty()) {
                return false;
            }
            // Get the last run record
            RunRecord runRecord = Iterables.getFirst(runRecords, null);
            if (runRecord != null && runRecord.getStatus() != ProgramRunStatus.STARTING && runRecord.getStatus() != ProgramRunStatus.RUNNING) {
                lastRunRecord.set(runRecord);
            }
            return lastRunRecord.get() != null;
        }
    }, 5, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    for (String dataset : outputs) {
        validatePartitions(dataset, dataset.equals(dsWithExistingPartition));
        validateFiles(dataset, dataset.equals(dsWithExistingPartition) ? loc : null);
    }
}
Also used : HashSet(java.util.HashSet) ResultSet(java.sql.ResultSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) MapReduceManager(co.cask.cdap.test.MapReduceManager) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) OutputStream(java.io.OutputStream) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) AtomicReference(java.util.concurrent.atomic.AtomicReference) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) IOException(java.io.IOException) RunRecord(co.cask.cdap.proto.RunRecord) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) Location(org.apache.twill.filesystem.Location) HashSet(java.util.HashSet)

Aggregations

InstanceNotFoundException (co.cask.cdap.api.dataset.InstanceNotFoundException)6 DatasetDefinition (co.cask.cdap.api.dataset.DatasetDefinition)3 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)3 DatasetSpecification (co.cask.cdap.api.dataset.DatasetSpecification)3 AbstractDatasetDefinition (co.cask.cdap.api.dataset.lib.AbstractDatasetDefinition)3 HashSet (java.util.HashSet)2 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)1 IncompatibleUpdateException (co.cask.cdap.api.dataset.IncompatibleUpdateException)1 InstanceConflictException (co.cask.cdap.api.dataset.InstanceConflictException)1 Updatable (co.cask.cdap.api.dataset.Updatable)1 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)1 WorkflowSpecification (co.cask.cdap.api.workflow.WorkflowSpecification)1 DatasetCreationSpec (co.cask.cdap.internal.dataset.DatasetCreationSpec)1 RunRecord (co.cask.cdap.proto.RunRecord)1 DatasetId (co.cask.cdap.proto.id.DatasetId)1 MapReduceManager (co.cask.cdap.test.MapReduceManager)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1