Search in sources :

Example 1 with FileSetModule

use of co.cask.cdap.data2.dataset2.lib.file.FileSetModule in project cdap by caskdata.

the class AbstractDatasetFrameworkTest method testBasicManagement.

@Test
public void testBasicManagement() throws Exception {
    DatasetTypeId tableType = NAMESPACE_ID.datasetType(Table.class.getName());
    // Adding modules
    DatasetFramework framework = getFramework();
    framework.addModule(IN_MEMORY, new InMemoryTableModule());
    framework.addModule(CORE, new CoreDatasetsModule());
    framework.addModule(FILE, new FileSetModule());
    framework.addModule(KEY_VALUE, new SingleTypeModule(SimpleKVTable.class));
    // keyvalue has been added in the system namespace
    Assert.assertTrue(framework.hasSystemType(Table.class.getName()));
    Assert.assertFalse(framework.hasSystemType(SimpleKVTable.class.getName()));
    Assert.assertTrue(framework.hasType(tableType));
    Assert.assertTrue(framework.hasType(SIMPLE_KV_TYPE));
    // Creating instances
    framework.addInstance(Table.class.getName(), MY_TABLE, DatasetProperties.EMPTY);
    Assert.assertTrue(framework.hasInstance(MY_TABLE));
    DatasetSpecification spec = framework.getDatasetSpec(MY_TABLE);
    Assert.assertNotNull(spec);
    Assert.assertEquals(MY_TABLE.getEntityName(), spec.getName());
    Assert.assertEquals(Table.class.getName(), spec.getType());
    framework.addInstance(Table.class.getName(), MY_TABLE2, DatasetProperties.EMPTY);
    Assert.assertTrue(framework.hasInstance(MY_TABLE2));
    // Update instances
    File baseDir = TMP_FOLDER.newFolder();
    framework.addInstance(FileSet.class.getName(), MY_DS, FileSetProperties.builder().setBasePath(baseDir.getPath()).setDataExternal(true).build());
    // this should fail because it would "internalize" external data
    try {
        framework.updateInstance(MY_DS, DatasetProperties.EMPTY);
        Assert.fail("update should have thrown instance conflict");
    } catch (InstanceConflictException e) {
    // expected
    }
    baseDir = TMP_FOLDER.newFolder();
    // this should succeed because it simply changes the external path
    framework.updateInstance(MY_DS, FileSetProperties.builder().setBasePath(baseDir.getPath()).setDataExternal(true).build());
    spec = framework.getDatasetSpec(MY_DS);
    Assert.assertNotNull(spec);
    Assert.assertEquals(baseDir.getPath(), FileSetProperties.getBasePath(spec.getProperties()));
    // cleanup
    try {
        framework.deleteAllModules(NAMESPACE_ID);
        Assert.fail("should not delete modules: there are datasets using their types");
    } catch (DatasetManagementException e) {
    // expected
    }
    // types are still there
    Assert.assertTrue(framework.hasType(tableType));
    Assert.assertTrue(framework.hasType(SIMPLE_KV_TYPE));
    framework.deleteAllInstances(NAMESPACE_ID);
    Assert.assertEquals(0, framework.getInstances(NAMESPACE_ID).size());
    Assert.assertFalse(framework.hasInstance(MY_TABLE));
    Assert.assertNull(framework.getDatasetSpec(MY_TABLE));
    Assert.assertFalse(framework.hasInstance(MY_TABLE2));
    Assert.assertNull(framework.getDatasetSpec(MY_TABLE2));
    // now it should succeed
    framework.deleteAllModules(NAMESPACE_ID);
    Assert.assertTrue(framework.hasSystemType(Table.class.getName()));
    Assert.assertFalse(framework.hasType(tableType));
    Assert.assertFalse(framework.hasType(SIMPLE_KV_TYPE));
}
Also used : DatasetTypeId(co.cask.cdap.proto.id.DatasetTypeId) Table(co.cask.cdap.api.dataset.table.Table) FileSet(co.cask.cdap.api.dataset.lib.FileSet) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification) LineageWriterDatasetFramework(co.cask.cdap.data2.metadata.writer.LineageWriterDatasetFramework) InMemoryTableModule(co.cask.cdap.data2.dataset2.module.lib.inmemory.InMemoryTableModule) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) InstanceConflictException(co.cask.cdap.api.dataset.InstanceConflictException) CoreDatasetsModule(co.cask.cdap.data2.dataset2.lib.table.CoreDatasetsModule) PartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.PartitionedFileSetModule) FileSetModule(co.cask.cdap.data2.dataset2.lib.file.FileSetModule) File(java.io.File) Test(org.junit.Test)

Example 2 with FileSetModule

use of co.cask.cdap.data2.dataset2.lib.file.FileSetModule in project cdap by caskdata.

the class SystemDatasetRuntimeModule method bindDefaultModules.

/**
   * Add bindings for Dataset modules that are available by default
   */
private void bindDefaultModules(MapBinder<String, DatasetModule> mapBinder) {
    mapBinder.addBinding("core").toInstance(new CoreDatasetsModule());
    mapBinder.addBinding("fileSet").toInstance(new FileSetModule());
    mapBinder.addBinding("timePartitionedFileSet").toInstance(new TimePartitionedFileSetModule());
    mapBinder.addBinding("partitionedFileSet").toInstance(new PartitionedFileSetModule());
    mapBinder.addBinding("objectMappedTable").toInstance(new ObjectMappedTableModule());
    mapBinder.addBinding("cube").toInstance(new CubeModule());
    mapBinder.addBinding("usage").toInstance(new UsageDatasetModule());
    mapBinder.addBinding("metadata").toInstance(new MetadataDatasetModule());
    mapBinder.addBinding("lineage").toInstance(new LineageDatasetModule());
    mapBinder.addBinding("externalDataset").toInstance(new ExternalDatasetModule());
}
Also used : MetadataDatasetModule(co.cask.cdap.data2.metadata.dataset.MetadataDatasetModule) LineageDatasetModule(co.cask.cdap.data2.metadata.lineage.LineageDatasetModule) ExternalDatasetModule(co.cask.cdap.data2.dataset2.lib.external.ExternalDatasetModule) TimePartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.TimePartitionedFileSetModule) ObjectMappedTableModule(co.cask.cdap.data2.dataset2.lib.table.ObjectMappedTableModule) CoreDatasetsModule(co.cask.cdap.data2.dataset2.lib.table.CoreDatasetsModule) UsageDatasetModule(co.cask.cdap.data2.registry.UsageDatasetModule) CubeModule(co.cask.cdap.data2.dataset2.lib.table.CubeModule) PartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.PartitionedFileSetModule) FileSetModule(co.cask.cdap.data2.dataset2.lib.file.FileSetModule) TimePartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.TimePartitionedFileSetModule) PartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.PartitionedFileSetModule) TimePartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.TimePartitionedFileSetModule)

Example 3 with FileSetModule

use of co.cask.cdap.data2.dataset2.lib.file.FileSetModule in project cdap by caskdata.

the class AbstractDatasetFrameworkTest method testMultipleTransitiveDependencies.

@Test
public void testMultipleTransitiveDependencies() throws DatasetManagementException, IOException {
    // Adding modules
    DatasetFramework framework = getFramework();
    try {
        framework.addModule(IN_MEMORY, new InMemoryTableModule());
        framework.addModule(CORE, new CoreDatasetsModule());
        framework.addModule(FILE, new FileSetModule());
        framework.addModule(PFS, new PartitionedFileSetModule());
        framework.addModule(TWICE, new SingleTypeModule(EmbedsTableTwiceDataset.class));
        // Creating an instances
        framework.addInstance(EmbedsTableTwiceDataset.class.getName(), MY_DS, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addStringField("x").build()).build());
        Assert.assertTrue(framework.hasInstance(MY_DS));
        framework.getDataset(MY_DS, DatasetProperties.EMPTY.getProperties(), null);
    } finally {
        framework.deleteAllInstances(NAMESPACE_ID);
        framework.deleteAllModules(NAMESPACE_ID);
    }
}
Also used : LineageWriterDatasetFramework(co.cask.cdap.data2.metadata.writer.LineageWriterDatasetFramework) InMemoryTableModule(co.cask.cdap.data2.dataset2.module.lib.inmemory.InMemoryTableModule) CoreDatasetsModule(co.cask.cdap.data2.dataset2.lib.table.CoreDatasetsModule) PartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.PartitionedFileSetModule) FileSetModule(co.cask.cdap.data2.dataset2.lib.file.FileSetModule) PartitionedFileSetModule(co.cask.cdap.data2.dataset2.lib.partitioned.PartitionedFileSetModule) Test(org.junit.Test)

Example 4 with FileSetModule

use of co.cask.cdap.data2.dataset2.lib.file.FileSetModule in project cdap by caskdata.

the class DataMigration method createRegisteredDatasetFramework.

/**
   * Sets up a {@link DatasetFramework} instance for standalone usage.  NOTE: should NOT be used by applications!!!
   */
public static DatasetFramework createRegisteredDatasetFramework(Injector injector) throws DatasetManagementException, IOException {
    DatasetDefinitionRegistryFactory registryFactory = injector.getInstance(DatasetDefinitionRegistryFactory.class);
    DatasetFramework datasetFramework = new InMemoryDatasetFramework(registryFactory);
    // TODO: this doesn't sound right. find out why its needed.
    datasetFramework.addModule(NamespaceId.SYSTEM.datasetModule("table"), new HBaseTableModule());
    datasetFramework.addModule(NamespaceId.SYSTEM.datasetModule("metricsTable"), new HBaseMetricsTableModule());
    datasetFramework.addModule(NamespaceId.SYSTEM.datasetModule("core"), new CoreDatasetsModule());
    datasetFramework.addModule(NamespaceId.SYSTEM.datasetModule("fileSet"), new FileSetModule());
    return datasetFramework;
}
Also used : DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) InMemoryDatasetFramework(co.cask.cdap.data2.dataset2.InMemoryDatasetFramework) DatasetDefinitionRegistryFactory(co.cask.cdap.data2.dataset2.DatasetDefinitionRegistryFactory) CoreDatasetsModule(co.cask.cdap.data2.dataset2.lib.table.CoreDatasetsModule) HBaseMetricsTableModule(co.cask.cdap.data2.dataset2.module.lib.hbase.HBaseMetricsTableModule) HBaseTableModule(co.cask.cdap.data2.dataset2.module.lib.hbase.HBaseTableModule) FileSetModule(co.cask.cdap.data2.dataset2.lib.file.FileSetModule) InMemoryDatasetFramework(co.cask.cdap.data2.dataset2.InMemoryDatasetFramework)

Aggregations

FileSetModule (co.cask.cdap.data2.dataset2.lib.file.FileSetModule)4 CoreDatasetsModule (co.cask.cdap.data2.dataset2.lib.table.CoreDatasetsModule)4 PartitionedFileSetModule (co.cask.cdap.data2.dataset2.lib.partitioned.PartitionedFileSetModule)3 InMemoryTableModule (co.cask.cdap.data2.dataset2.module.lib.inmemory.InMemoryTableModule)2 LineageWriterDatasetFramework (co.cask.cdap.data2.metadata.writer.LineageWriterDatasetFramework)2 Test (org.junit.Test)2 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)1 DatasetSpecification (co.cask.cdap.api.dataset.DatasetSpecification)1 InstanceConflictException (co.cask.cdap.api.dataset.InstanceConflictException)1 FileSet (co.cask.cdap.api.dataset.lib.FileSet)1 Table (co.cask.cdap.api.dataset.table.Table)1 DatasetDefinitionRegistryFactory (co.cask.cdap.data2.dataset2.DatasetDefinitionRegistryFactory)1 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)1 InMemoryDatasetFramework (co.cask.cdap.data2.dataset2.InMemoryDatasetFramework)1 ExternalDatasetModule (co.cask.cdap.data2.dataset2.lib.external.ExternalDatasetModule)1 TimePartitionedFileSetModule (co.cask.cdap.data2.dataset2.lib.partitioned.TimePartitionedFileSetModule)1 CubeModule (co.cask.cdap.data2.dataset2.lib.table.CubeModule)1 ObjectMappedTableModule (co.cask.cdap.data2.dataset2.lib.table.ObjectMappedTableModule)1 HBaseMetricsTableModule (co.cask.cdap.data2.dataset2.module.lib.hbase.HBaseMetricsTableModule)1 HBaseTableModule (co.cask.cdap.data2.dataset2.module.lib.hbase.HBaseTableModule)1