Examples with PartitionedFileSet - io.cdap.cdap.api.dataset.lib.PartitionedFileSet

Example 1 with PartitionedFileSet

use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedTextFile.

// this tests mainly the support for different text formats. Other features (partitioning etc.) are tested above.
private void testPartitionedTextFile(String name, String format, String delim, String fileDelim) throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset(name);
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    PartitionedFileSetProperties.Builder builder = (PartitionedFileSetProperties.Builder) PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(name).setEnableExploreOnCreate(true).setExploreSchema("key STRING, value INT").setExploreFormat(format);
    if (delim != null) {
        builder.setExploreFormatProperty("delimiter", delim);
    }
    datasetFramework.addInstance("partitionedFileSet", datasetInstanceId, builder.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add a partitions. Beware that Hive expects a partition to be a directory, so we create a dir with one file
    Location location1 = fileSet.getLocation("file1/nn");
    FileWriterHelper.generateTextFile(location1.getOutputStream(), fileDelim, "x", 1, 2);
    PartitionKey key1 = PartitionKey.builder().addIntField("number", 1).build();
    addPartition(partitioned, key1, "file1");
    // verify that the partitions were added to Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("number=1"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " ORDER BY key", true, Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    // drop a partition and query again
    dropPartition(partitioned, key1);
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
}

Also used : QueryResult(io.cdap.cdap.proto.QueryResult) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSetProperties(io.cdap.cdap.api.dataset.lib.PartitionedFileSetProperties) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 2 with PartitionedFileSet

use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.

private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
    final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
    File path = new File(tmpFolder.newFolder(), "base");
    String tableName = "reuse";
    // create a PFS in order to create a table in Hive and add a partition
    // create a time partitioned file set
    DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
    PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(dummy);
    Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
    PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
    FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
    addPartition(dummy, key, "number1");
    // validate data
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.updateInstance(datasetInstanceId, props);
    // validate data
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
    }
    datasetFramework.deleteInstance(datasetInstanceId);
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
    }
    datasetFramework.deleteInstance(dummyInstanceId);
}

Also used : QueryResult(io.cdap.cdap.proto.QueryResult) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) File(java.io.File) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 3 with PartitionedFileSet

use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedFileSet.

private void testPartitionedFileSet(@Nullable String dbName, @Nullable String tableName) throws Exception {
    DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parted");
    String hiveTableName = getDatasetHiveName(datasetInstanceId);
    String showTablesCommand = "show tables";
    FileSetProperties.Builder props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addStringField("str").addIntField("num").build()).setBasePath("parted").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString());
    if (tableName != null) {
        props.setExploreTableName(tableName);
        hiveTableName = tableName;
    }
    String queryTableName = hiveTableName;
    if (dbName != null) {
        props.setExploreDatabaseName(dbName);
        runCommand(NAMESPACE_ID, "create database " + dbName, false, null, null);
        showTablesCommand += " in " + dbName;
        queryTableName = dbName + "." + queryTableName;
    }
    // create a time partitioned file set
    datasetFramework.addInstance("partitionedFileSet", datasetInstanceId, props.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // Accessing dataset instance to perform data operations
    final PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    Location locationX1 = fileSet.getLocation("fileX1/nn");
    Location locationY1 = fileSet.getLocation("fileY1/nn");
    Location locationX2 = fileSet.getLocation("fileX2/nn");
    Location locationY2 = fileSet.getLocation("fileY2/nn");
    FileWriterHelper.generateAvroFile(locationX1.getOutputStream(), "x", 1, 2);
    FileWriterHelper.generateAvroFile(locationY1.getOutputStream(), "y", 1, 2);
    FileWriterHelper.generateAvroFile(locationX2.getOutputStream(), "x", 2, 3);
    FileWriterHelper.generateAvroFile(locationY2.getOutputStream(), "y", 2, 3);
    final PartitionKey keyX1 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 1).build();
    PartitionKey keyY1 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 1).build();
    final PartitionKey keyX2 = PartitionKey.builder().addStringField("str", "x").addIntField("num", 2).build();
    PartitionKey keyY2 = PartitionKey.builder().addStringField("str", "y").addIntField("num", 2).build();
    addPartition(partitioned, keyX1, "fileX1");
    addPartition(partitioned, keyY1, "fileY1");
    addPartition(partitioned, keyX2, "fileX2");
    addPartition(partitioned, keyY2, "fileY2");
    // verify that the partitions were added to Hive
    validatePartitions(queryTableName, partitioned, ImmutableList.of(keyX1, keyX2, keyY1, keyY2));
    // verify that count() and where... work in Hive
    runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(4L))));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName + " WHERE num = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null), new ColumnDesc(hiveTableName + ".value", "STRING", 2, null), new ColumnDesc(hiveTableName + ".str", "STRING", 3, null), new ColumnDesc(hiveTableName + ".num", "INT", 4, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x2", "#2", "x", 2)), new QueryResult(Lists.<Object>newArrayList("y2", "#2", "y", 2))));
    // drop a partition and query again
    dropPartition(partitioned, keyX2);
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // attempt a transaction that drops one partition, adds another, and then fails
    try {
        doTransaction(partitioned, new Runnable() {

            @Override
            public void run() {
                partitioned.dropPartition(keyX1);
                partitioned.addPartition(keyX2, "fileX2");
                Assert.fail("fail tx");
            }
        });
    } catch (TransactionFailureException e) {
    // expected
    }
    // validate that both the drop and addPartition were undone
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // attempt a transaction that attempts to add an existing partition, hence fails
    try {
        doTransaction(partitioned, new Runnable() {

            @Override
            public void run() {
                partitioned.addPartition(keyX1, "fileX1");
                throw new RuntimeException("on purpose");
            }
        });
    } catch (TransactionFailureException e) {
        // expected if the cause is not "on purpose"
        Assert.assertTrue(e.getCause() instanceof DataSetException);
    }
    // validate that both the drop and addPartition were undone
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // drop a partition directly from hive
    runCommand(NAMESPACE_ID, "ALTER TABLE " + queryTableName + " DROP PARTITION (str='y', num=2)", false, null, null);
    // verify that one more value is gone now, namely y2, in Hive, but the PFS still has it
    validatePartitionsInHive(queryTableName, ImmutableSet.of(keyX1, keyY1));
    validatePartitionsInPFS(partitioned, ImmutableSet.of(keyX1, keyY1, keyY2));
    // make sure the partition can still be dropped from the PFS dataset
    dropPartition(partitioned, keyY2);
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1));
    // change the explore schema by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    // valudate the schema was updated
    validatePartitions(queryTableName, partitioned, ImmutableSet.of(keyX1, keyY1), true);
    // disable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(false).build());
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
    // re-enable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(true).build());
    // verify the Hive table is back
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, null, Collections.<QueryResult>emptyList());
}

Also used : PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) PartitionedFileSetProperties(io.cdap.cdap.api.dataset.lib.PartitionedFileSetProperties) FileSetProperties(io.cdap.cdap.api.dataset.lib.FileSetProperties) QueryResult(io.cdap.cdap.proto.QueryResult) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Location(org.apache.twill.filesystem.Location)

Example 4 with PartitionedFileSet

use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedAvroSchemaUpdate.

@Test
public void testPartitionedAvroSchemaUpdate() throws Exception {
    final DatasetId datasetId = NAMESPACE_ID.dataset("avroupd");
    final String tableName = getDatasetHiveName(datasetId);
    // create a time partitioned file set
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // Accessing dataset instance to perform data operations
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add a partition
    Location location4 = fileSet.getLocation("file4/nn");
    FileWriterHelper.generateAvroFile(location4.getOutputStream(), "x", 4, 5);
    addPartition(partitioned, PartitionKey.builder().addIntField("number", 4).build(), "file4");
    // new partition should have new format, validate with query
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "STRING", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
    new QueryResult(Lists.<Object>newArrayList("x4", "#4", 4))));
    // update the partitioned file set
    datasetFramework.updateInstance(datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".number", "INT", 2, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
    new QueryResult(Lists.<Object>newArrayList("x4", 4))));
}

Also used : QueryResult(io.cdap.cdap.proto.QueryResult) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 5 with PartitionedFileSet

use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class ExploreTableManager method generateDisableStatement.

private String generateDisableStatement(DatasetId datasetId, DatasetSpecification spec) throws ExploreException {
    String tableName = tableNaming.getTableName(datasetId, spec.getProperties());
    String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
    // If table does not exist, nothing to be done
    try {
        exploreService.getTableInfo(datasetId.getNamespace(), databaseName, tableName);
    } catch (TableNotFoundException e) {
        // Ignore exception, since this means table was not found.
        return null;
    }
    try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
        Dataset dataset = datasetInstantiator.getDataset(datasetId);
        try {
            if (dataset instanceof FileSet || dataset instanceof PartitionedFileSet) {
                // do not drop the explore table that dataset is reusing an existing table
                if (FileSetProperties.isUseExisting(spec.getProperties())) {
                    return null;
                }
            }
            return generateDeleteStatement(dataset, databaseName, tableName);
        } finally {
            Closeables.closeQuietly(dataset);
        }
    } catch (IOException e) {
        LOG.error("Exception creating dataset classLoaderProvider for dataset {}.", datasetId, e);
        throw new ExploreException("Exception instantiating dataset " + datasetId);
    }
}

Also used : PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) SystemDatasetInstantiator(io.cdap.cdap.data.dataset.SystemDatasetInstantiator) Dataset(io.cdap.cdap.api.dataset.Dataset) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) IOException(java.io.IOException)

Aggregations

PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)53 Test (org.junit.Test)36 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)26 TransactionAware (org.apache.tephra.TransactionAware)22 TransactionExecutor (org.apache.tephra.TransactionExecutor)22 Location (org.apache.twill.filesystem.Location)22 PartitionDetail (io.cdap.cdap.api.dataset.lib.PartitionDetail)14 IOException (java.io.IOException)13 DataSetException (io.cdap.cdap.api.dataset.DataSetException)12 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)12 List (java.util.List)12 PartitionNotFoundException (io.cdap.cdap.api.dataset.PartitionNotFoundException)11 PartitionAlreadyExistsException (io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException)11 ConcurrentPartitionConsumer (io.cdap.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)11 PartitionConsumer (io.cdap.cdap.api.dataset.lib.partitioned.PartitionConsumer)11 ImmutableList (com.google.common.collect.ImmutableList)9 HashSet (java.util.HashSet)9 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)8 ArrayList (java.util.ArrayList)8 TransactionContext (org.apache.tephra.TransactionContext)8