Search in sources :

Example 1 with ObjectMappedTable

use of co.cask.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.

the class HiveExploreObjectMappedTableTestRun method setupTable.

private void setupTable(@Nullable String dbName, @Nullable String tableName) throws Exception {
    if (dbName != null) {
        runCommand(NAMESPACE_ID, "create database if not exists " + dbName, false, null, null);
    }
    datasetFramework.addInstance(ObjectMappedTable.class.getName(), MY_TABLE, setupProperties(dbName, tableName, "row_key"));
    // Accessing dataset instance to perform data operations
    ObjectMappedTable<Record> table = datasetFramework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(table);
    TransactionAware txTable = (TransactionAware) table;
    Transaction tx1 = transactionManager.startShort(100);
    txTable.startTx(tx1);
    record1 = new Record(123, 1234567890L, 3.14159f, 3.1415926535, "foobar", new byte[] { 1, 2, 3 });
    record2 = new Record(0 - 987, 9876543210L, 2.71f, 2.71112384, "hello world", new byte[] { 4, 5, 6 });
    table.write("123", record1);
    table.write("456", record2);
    Assert.assertTrue(txTable.commitTx());
    transactionManager.canCommit(tx1, txTable.getTxChanges());
    transactionManager.commit(tx1);
    txTable.postTxCommit();
}
Also used : Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Record(co.cask.cdap.explore.service.datasets.Record) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable)

Example 2 with ObjectMappedTable

use of co.cask.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.

the class ExploreTableManager method generateEnableStatement.

/**
   * Generate a Hive DDL statement to create a Hive table for the given dataset.
   *
   * @param dataset the instantiated dataset
   * @param spec the dataset specification
   * @param datasetId the dataset id
   * @param truncating whether this call to create() is part of a truncate() operation, which is in some
   *                   case implemented using disableExplore() followed by enableExplore()
   *
   * @return a CREATE TABLE statement, or null if the dataset is not explorable
   * @throws UnsupportedTypeException if the dataset is a RecordScannable of a type that is not supported by Hive
   */
@Nullable
private String generateEnableStatement(Dataset dataset, DatasetSpecification spec, DatasetId datasetId, String tableName, boolean truncating) throws UnsupportedTypeException, ExploreException {
    String datasetName = datasetId.getDataset();
    Map<String, String> serdeProperties = ImmutableMap.of(Constants.Explore.DATASET_NAME, datasetId.getDataset(), Constants.Explore.DATASET_NAMESPACE, datasetId.getNamespace());
    // or it must be a FileSet or a PartitionedFileSet with explore enabled in it properties.
    if (dataset instanceof Table) {
        // valid for a table not to have a schema property. this logic should really be in Table
        return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, false);
    }
    if (dataset instanceof ObjectMappedTable) {
        return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
    }
    boolean isRecordScannable = dataset instanceof RecordScannable;
    boolean isRecordWritable = dataset instanceof RecordWritable;
    if (isRecordScannable || isRecordWritable) {
        Type recordType = isRecordScannable ? ((RecordScannable) dataset).getRecordType() : ((RecordWritable) dataset).getRecordType();
        // Use == because that's what same class means.
        if (StructuredRecord.class == recordType) {
            return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
        }
        // otherwise, derive the schema from the record type
        LOG.debug("Enabling explore for dataset instance {}", datasetName);
        String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
        return new CreateStatementBuilder(datasetName, databaseName, tableName, shouldEscapeColumns).setSchema(hiveSchemaFor(recordType)).setTableComment("CDAP Dataset").buildWithStorageHandler(DatasetStorageHandler.class.getName(), serdeProperties);
    } else if (dataset instanceof FileSet || dataset instanceof PartitionedFileSet) {
        Map<String, String> properties = spec.getProperties();
        if (FileSetProperties.isExploreEnabled(properties)) {
            LOG.debug("Enabling explore for dataset instance {}", datasetName);
            return generateFileSetCreateStatement(datasetId, dataset, properties, truncating);
        }
    }
    // dataset is not explorable
    return null;
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable) RecordWritable(co.cask.cdap.api.data.batch.RecordWritable) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) CreateStatementBuilder(co.cask.cdap.explore.table.CreateStatementBuilder) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) RecordScannable(co.cask.cdap.api.data.batch.RecordScannable) Type(java.lang.reflect.Type) DatasetStorageHandler(co.cask.cdap.hive.datasets.DatasetStorageHandler) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Nullable(javax.annotation.Nullable)

Example 3 with ObjectMappedTable

use of co.cask.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.

the class SparkTestRun method testDatasetSQL.

@Test
public void testDatasetSQL() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    DataSetManager<ObjectMappedTable<Person>> tableManager = getDataset("PersonTable");
    ObjectMappedTable<Person> table = tableManager.get();
    table.write("1", new Person("1", "Bob", 10));
    table.write("2", new Person("2", "Bill", 20));
    table.write("3", new Person("3", "Berry", 30));
    tableManager.flush();
    SparkManager sparkManager = appManager.getSparkManager(DatasetSQLSpark.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    // The program executes "SELECT * FROM Person WHERE age > 10", hence expected two new entries for Bill and Berry.
    tableManager.flush();
    Person person = table.read("new:2");
    Assert.assertEquals("Bill", person.name());
    Assert.assertEquals(20, person.age());
    person = table.read("new:3");
    Assert.assertEquals("Berry", person.name());
    Assert.assertEquals(30, person.age());
    // Shouldn't have new Bob
    Assert.assertNull(table.read("new:1"));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable) Person(co.cask.cdap.spark.app.Person) Test(org.junit.Test)

Aggregations

ObjectMappedTable (co.cask.cdap.api.dataset.lib.ObjectMappedTable)3 RecordScannable (co.cask.cdap.api.data.batch.RecordScannable)1 RecordWritable (co.cask.cdap.api.data.batch.RecordWritable)1 FileSet (co.cask.cdap.api.dataset.lib.FileSet)1 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)1 Table (co.cask.cdap.api.dataset.table.Table)1 Record (co.cask.cdap.explore.service.datasets.Record)1 CreateStatementBuilder (co.cask.cdap.explore.table.CreateStatementBuilder)1 DatasetStorageHandler (co.cask.cdap.hive.datasets.DatasetStorageHandler)1 Person (co.cask.cdap.spark.app.Person)1 ApplicationManager (co.cask.cdap.test.ApplicationManager)1 SparkManager (co.cask.cdap.test.SparkManager)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Type (java.lang.reflect.Type)1 Map (java.util.Map)1 Nullable (javax.annotation.Nullable)1 Transaction (org.apache.tephra.Transaction)1 TransactionAware (org.apache.tephra.TransactionAware)1 Test (org.junit.Test)1