Search in sources :

Example 61 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class DynamicPartitioningTestRun method testDynamicPartitioningMRWithFailure.

private void testDynamicPartitioningMRWithFailure(ApplicationManager appManager, String dsWithExistingPartition, String... outputs) throws Exception {
    // set up the output datasets
    String outputArg = "";
    for (String dataset : outputs) {
        outputArg += dataset + " ";
        try {
            deleteDatasetInstance(testSpace.dataset(dataset));
        } catch (InstanceNotFoundException e) {
        // may be expected. I wish the test framework had truncate()
        }
        addDatasetInstance(PartitionedFileSet.class.getName(), testSpace.dataset(dataset), PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING).setEnableExploreOnCreate(true).setOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class).setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",").setExploreFormat("csv").setExploreSchema("key string, value string").build());
    }
    outputArg = outputArg.trim();
    // create partition (x="1") in one of the outputs
    DataSetManager<PartitionedFileSet> pfs = getDataset(testSpace.dataset(dsWithExistingPartition));
    Location loc = pfs.get().getEmbeddedFileSet().getLocation("some/path");
    OutputStream os = loc.append("part1").getOutputStream();
    try (Writer writer = new OutputStreamWriter(os)) {
        writer.write("1,x\n");
    }
    pfs.get().addPartition(PartitionKey.builder().addStringField("x", "1").build(), "some/path");
    pfs.flush();
    validatePartitions(dsWithExistingPartition, true);
    Map<String, String> arguments = ImmutableMap.of("outputs", outputArg);
    final MapReduceManager mrManager = appManager.getMapReduceManager("DynamicPartitioningMR");
    final Set<RunRecord> oldRunRecords = new HashSet<>(mrManager.getHistory());
    mrManager.start(arguments);
    // Wait for the new run record to appear and finished running.
    final AtomicReference<RunRecord> lastRunRecord = new AtomicReference<>();
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            Set<RunRecord> runRecords = Sets.difference(new HashSet<>(mrManager.getHistory()), oldRunRecords);
            if (runRecords.isEmpty()) {
                return false;
            }
            // Get the last run record
            RunRecord runRecord = Iterables.getFirst(runRecords, null);
            if (runRecord != null && runRecord.getStatus() != ProgramRunStatus.STARTING && runRecord.getStatus() != ProgramRunStatus.RUNNING) {
                lastRunRecord.set(runRecord);
            }
            return lastRunRecord.get() != null;
        }
    }, 5, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    for (String dataset : outputs) {
        validatePartitions(dataset, dataset.equals(dsWithExistingPartition));
        validateFiles(dataset, dataset.equals(dsWithExistingPartition) ? loc : null);
    }
}
Also used : HashSet(java.util.HashSet) ResultSet(java.sql.ResultSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) MapReduceManager(co.cask.cdap.test.MapReduceManager) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) OutputStream(java.io.OutputStream) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) AtomicReference(java.util.concurrent.atomic.AtomicReference) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) IOException(java.io.IOException) RunRecord(co.cask.cdap.proto.RunRecord) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) Location(org.apache.twill.filesystem.Location) HashSet(java.util.HashSet)

Example 62 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionConsumingTestRun method testWordCountOnFileSet.

private void testWordCountOnFileSet(Function<ApplicationManager, ProgramManager> runProgram, boolean produceOutputPartitionEachRun) throws Exception {
    ApplicationManager applicationManager = deployApplication(AppWithPartitionConsumers.class);
    ServiceManager serviceManager = applicationManager.getServiceManager("DatasetService").start();
    serviceManager.waitForStatus(true);
    URL serviceURL = serviceManager.getServiceURL();
    // write a file to the file set using the service and run the WordCount MapReduce job on that one partition
    createPartition(serviceURL, LINE1, "1");
    ProgramManager programManager = runProgram.apply(applicationManager);
    programManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    Assert.assertEquals(new Long(2), getCount(serviceURL, "a"));
    Assert.assertEquals(new Long(1), getCount(serviceURL, "b"));
    Assert.assertEquals(new Long(0), getCount(serviceURL, "c"));
    // create two additional partitions
    createPartition(serviceURL, LINE2, "2");
    createPartition(serviceURL, LINE3, "3");
    // running the program job now processes these two new partitions (LINE2 and LINE3) and updates the counts
    // dataset accordingly
    programManager = runProgram.apply(applicationManager);
    programManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
    Assert.assertEquals(new Long(3), getCount(serviceURL, "a"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "b"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "c"));
    // running the program without adding new partitions does not affect the counts dataset
    programManager = runProgram.apply(applicationManager);
    programManager.waitForRuns(ProgramRunStatus.COMPLETED, 3, 5, TimeUnit.MINUTES);
    Assert.assertEquals(new Long(3), getCount(serviceURL, "a"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "b"));
    Assert.assertEquals(new Long(3), getCount(serviceURL, "c"));
    DataSetManager<PartitionedFileSet> outputLines = getDataset("outputLines");
    Set<PartitionDetail> partitions = outputLines.get().getPartitions(PartitionFilter.ALWAYS_MATCH);
    // each of the three MapReduce runs produces an output partition (even if there's no input data)
    // however, Worker run doesn't produce a new output partition if there's no new input partition
    Assert.assertEquals(produceOutputPartitionEachRun ? 3 : 2, partitions.size());
    // we only store the counts to the "outputLines" dataset
    List<String> expectedCounts = Lists.newArrayList("1", "1", "2", "2", "3");
    List<String> outputRecords = getDataFromExplore("outputLines");
    Collections.sort(outputRecords);
    Assert.assertEquals(expectedCounts, outputRecords);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ServiceManager(co.cask.cdap.test.ServiceManager) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) URL(java.net.URL) ProgramManager(co.cask.cdap.test.ProgramManager)

Example 63 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionBatchInput method setInput.

/**
 * Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has
 * specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job.
 * It does this by reading back the previous state, determining the new partitions to read, computing the new
 * state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is
 * passed in.
 *
 * @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is
 *                         configured
 * @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from
 * @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is
 *                       managed
 * @param consumerConfiguration defines parameters for the partition consumption
 * @return a BatchPartitionCommitter used to persist the state of the partition consumer
 */
public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) {
    PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName);
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration);
    final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
    Map<String, String> arguments = new HashMap<>();
    PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions);
    mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments));
    return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded);
}
Also used : Input(co.cask.cdap.api.data.batch.Input) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) DatasetStatePersistor(co.cask.cdap.api.dataset.lib.DatasetStatePersistor) MapReduceContext(co.cask.cdap.api.mapreduce.MapReduceContext) List(java.util.List) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Beta(co.cask.cdap.api.annotation.Beta) Map(java.util.Map) HashMap(java.util.HashMap) PartitionedFileSetArguments(co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments) Partition(co.cask.cdap.api.dataset.lib.Partition) HashMap(java.util.HashMap) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail)

Example 64 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.

private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
    ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
    // write a value to the input table
    final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
        }
    });
    // a partition key for the map/reduce output
    final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
    // run the partition writer m/r with this output partition time
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyX);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertTrue(path.contains("x"));
            Assert.assertTrue(path.contains("150000"));
        }
    });
    // delete the data in the input table and write a new row
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            table.delete(Bytes.toBytes("x"));
            table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
        }
    });
    // a new partition key for the next map/reduce
    final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
    // now run the m/r again with a new partition time, say 5 minutes later
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
    // this should have created a partition in the tpfs
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Partition partition = dataset.getPartition(keyY);
            Assert.assertNotNull(partition);
            String path = partition.getRelativePath();
            Assert.assertNotNull(path);
            Assert.assertTrue(path.contains("y"));
            Assert.assertTrue(path.contains("200000"));
        }
    });
    // a partition filter that matches the outputs of both map/reduces
    PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
    // now run a map/reduce that reads all the partitions
    runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read both partitions - and written both x and y to row a
    final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("a"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertEquals("2", row.getString("y"));
            Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
        }
    });
    // a partition filter that matches the output key of the first map/reduce
    PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
    // now run a map/reduce that reads a range of the partitions, namely the first one
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read the first partition only - and written only x to row b
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("b"));
            Assert.assertEquals("1", row.getString("x"));
            Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
            Assert.assertNull(row.get("y"));
            Assert.assertNull(row.get("y_key"));
        }
    });
    // a partition filter that matches no key
    PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
    // now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
    inputArgs.clear();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
    runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
    Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
    // this should have read no partitions - and written nothing to row n
    Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Row row = output.get(Bytes.toBytes("n"));
            Assert.assertTrue(row.isEmpty());
        }
    });
}
Also used : Partition(co.cask.cdap.api.dataset.lib.Partition) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) Row(co.cask.cdap.api.dataset.table.Row)

Example 65 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class SmartWorkflow method destroy.

@Override
public void destroy() {
    WorkflowContext workflowContext = getContext();
    PipelineRuntime pipelineRuntime = new PipelineRuntime(workflowContext, workflowMetrics);
    // Execute the post actions only if pipeline is not running in preview mode.
    if (!workflowContext.getDataTracer(PostAction.PLUGIN_TYPE).isEnabled()) {
        for (Map.Entry<String, PostAction> endingActionEntry : postActions.entrySet()) {
            String name = endingActionEntry.getKey();
            PostAction action = endingActionEntry.getValue();
            StageSpec stageSpec = stageSpecs.get(name);
            BatchActionContext context = new WorkflowBackedActionContext(workflowContext, pipelineRuntime, stageSpec);
            try {
                action.run(context);
            } catch (Throwable t) {
                LOG.error("Error while running post action {}.", name, t);
            }
        }
    }
    // publish all alerts
    for (Map.Entry<String, AlertPublisher> alertPublisherEntry : alertPublishers.entrySet()) {
        String name = alertPublisherEntry.getKey();
        AlertPublisher alertPublisher = alertPublisherEntry.getValue();
        PartitionedFileSet alertConnector = workflowContext.getDataset(name);
        try (CloseableIterator<Alert> alerts = new AlertReader(alertConnector.getPartitions(PartitionFilter.ALWAYS_MATCH))) {
            if (!alerts.hasNext()) {
                continue;
            }
            StageMetrics stageMetrics = new DefaultStageMetrics(workflowMetrics, name);
            StageSpec stageSpec = stageSpecs.get(name);
            AlertPublisherContext alertContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, workflowContext, workflowContext.getAdmin());
            alertPublisher.initialize(alertContext);
            TrackedIterator<Alert> trackedIterator = new TrackedIterator<>(alerts, stageMetrics, Constants.Metrics.RECORDS_IN);
            alertPublisher.publish(trackedIterator);
        } catch (Exception e) {
            LOG.warn("Stage {} had errors publishing alerts. Alerts may not have been published.", name, e);
        } finally {
            try {
                alertPublisher.destroy();
            } catch (Exception e) {
                LOG.warn("Error destroying alert publisher for stage {}", name, e);
            }
        }
    }
    ProgramStatus status = getContext().getState().getStatus();
    if (status == ProgramStatus.FAILED) {
        WRAPPERLOGGER.error("Pipeline '{}' failed.", getContext().getApplicationSpecification().getName());
    } else {
        WRAPPERLOGGER.info("Pipeline '{}' {}.", getContext().getApplicationSpecification().getName(), status == ProgramStatus.COMPLETED ? "succeeded" : status.name().toLowerCase());
    }
    MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), workflowContext.getLogicalStartTime(), workflowContext, workflowContext.getNamespace());
    // Get resolved plugin properties
    Map<String, Map<String, String>> resolvedProperties = new HashMap<>();
    for (StageSpec spec : stageSpecs.values()) {
        String stageName = spec.getName();
        resolvedProperties.put(stageName, workflowContext.getPluginProperties(stageName, macroEvaluator).getProperties());
    }
    // Add resolved plugin properties to workflow token as a JSON String
    workflowContext.getToken().put(RESOLVED_PLUGIN_PROPERTIES_MAP, GSON.toJson(resolvedProperties));
}
Also used : PipelineRuntime(co.cask.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(co.cask.cdap.api.macro.MacroEvaluator) BatchActionContext(co.cask.cdap.etl.api.batch.BatchActionContext) WorkflowBackedActionContext(co.cask.cdap.etl.batch.WorkflowBackedActionContext) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) AlertReader(co.cask.cdap.etl.batch.connector.AlertReader) StageSpec(co.cask.cdap.etl.spec.StageSpec) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultAlertPublisherContext(co.cask.cdap.etl.common.DefaultAlertPublisherContext) AlertPublisherContext(co.cask.cdap.etl.api.AlertPublisherContext) AlertPublisher(co.cask.cdap.etl.api.AlertPublisher) TrackedIterator(co.cask.cdap.etl.common.TrackedIterator) WorkflowContext(co.cask.cdap.api.workflow.WorkflowContext) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) DisjointConnectionsException(co.cask.cdap.etl.planner.DisjointConnectionsException) Alert(co.cask.cdap.etl.api.Alert) PostAction(co.cask.cdap.etl.api.batch.PostAction) DefaultAlertPublisherContext(co.cask.cdap.etl.common.DefaultAlertPublisherContext) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) ProgramStatus(co.cask.cdap.api.ProgramStatus)

Aggregations

PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)65 Test (org.junit.Test)39 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)32 Location (org.apache.twill.filesystem.Location)25 TransactionAware (org.apache.tephra.TransactionAware)24 TransactionExecutor (org.apache.tephra.TransactionExecutor)24 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)18 IOException (java.io.IOException)17 DataSetException (co.cask.cdap.api.dataset.DataSetException)12 FileSet (co.cask.cdap.api.dataset.lib.FileSet)12 HashSet (java.util.HashSet)12 List (java.util.List)12 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)11 PartitionAlreadyExistsException (co.cask.cdap.api.dataset.lib.PartitionAlreadyExistsException)11 ConcurrentPartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)11 PartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer)11 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)9 ImmutableList (com.google.common.collect.ImmutableList)9 ArrayList (java.util.ArrayList)9 HashMap (java.util.HashMap)9