Search in sources :

Example 1 with ObjectMappedTable

use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.

the class SparkTest method testDatasetSQL.

@Test
@Ignore("For this to work in spark 2 and later DefaultSource should implement" + "org.apache.spark.sql.execution.datasources.FileFormat")
public void testDatasetSQL() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    DataSetManager<ObjectMappedTable<Person>> tableManager = getDataset("PersonTable");
    ObjectMappedTable<Person> table = tableManager.get();
    table.write("1", new Person("Bob", 10));
    table.write("2", new Person("Bill", 20));
    table.write("3", new Person("Berry", 30));
    tableManager.flush();
    SparkManager sparkManager = appManager.getSparkManager(DatasetSQLSpark.class.getSimpleName());
    sparkManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    // The program executes "SELECT * FROM Person WHERE age > 10", hence expected two new entries for Bill and Berry.
    tableManager.flush();
    Person person = table.read("new:2");
    Assert.assertEquals("Bill", person.name());
    Assert.assertEquals(20, person.age());
    person = table.read("new:3");
    Assert.assertEquals("Berry", person.name());
    Assert.assertEquals(30, person.age());
    // Shouldn't have new Bob
    Assert.assertNull(table.read("new:1"));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) ObjectMappedTable(io.cdap.cdap.api.dataset.lib.ObjectMappedTable) Person(io.cdap.cdap.spark.app.Person) DatasetSQLSpark(io.cdap.cdap.spark.app.DatasetSQLSpark) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with ObjectMappedTable

use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.

the class HiveExploreStructuredRecordTestRun method testObjectMappedTable.

@Test
public void testObjectMappedTable() throws Exception {
    // Add a ObjectMappedTable instance
    final DatasetId datasetId = NAMESPACE_ID.dataset("person");
    datasetFramework.addInstance(ObjectMappedTable.class.getName(), datasetId, ObjectMappedTableProperties.builder().setType(Person.class).setRowKeyExploreName("id").setRowKeyExploreType(Schema.Type.STRING).build());
    // Insert data using sql
    String command = String.format("INSERT into %s (id, firstname, lastname, age) VALUES (\"%s\", \"%s\", \"%s\", %d)", getDatasetHiveName(datasetId), "bobby", "Bobby", "Bob", 15);
    ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
    Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
    transactional.execute(new TxRunnable() {

        @Override
        public void run(DatasetContext context) throws Exception {
            // Read the data back via dataset directly
            ObjectMappedTable<Person> objTable = context.getDataset(datasetId.getDataset());
            Person person = objTable.read("bobby");
            Assert.assertNotNull(person);
            Assert.assertEquals("Bobby", person.getFirstName());
            Assert.assertEquals("Bob", person.getLastName());
            Assert.assertEquals(15, person.getAge());
        }
    });
    // Delete the dataset, hence also drop the table.
    datasetFramework.deleteInstance(datasetId);
}
Also used : TxRunnable(io.cdap.cdap.api.TxRunnable) ObjectMappedTable(io.cdap.cdap.api.dataset.lib.ObjectMappedTable) DatasetContext(io.cdap.cdap.api.data.DatasetContext) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 3 with ObjectMappedTable

use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.

the class ExploreTableManager method generateEnableStatement.

/**
 * Generate a Hive DDL statement to create a Hive table for the given dataset.
 *
 * @param dataset the instantiated dataset
 * @param spec the dataset specification
 * @param datasetId the dataset id
 * @param truncating whether this call to create() is part of a truncate() operation, which is in some
 *                   case implemented using disableExplore() followed by enableExplore()
 *
 * @return a CREATE TABLE statement, or null if the dataset is not explorable
 * @throws UnsupportedTypeException if the dataset is a RecordScannable of a type that is not supported by Hive
 */
@Nullable
private String generateEnableStatement(Dataset dataset, DatasetSpecification spec, DatasetId datasetId, String tableName, boolean truncating) throws UnsupportedTypeException, ExploreException {
    String datasetName = datasetId.getDataset();
    Map<String, String> serdeProperties = ImmutableMap.of(Constants.Explore.DATASET_NAME, datasetId.getDataset(), Constants.Explore.DATASET_NAMESPACE, datasetId.getNamespace());
    // or it must be a FileSet or a PartitionedFileSet with explore enabled in it properties.
    if (dataset instanceof Table) {
        // valid for a table not to have a schema property. this logic should really be in Table
        return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, false);
    }
    if (dataset instanceof ObjectMappedTable) {
        return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
    }
    boolean isRecordScannable = dataset instanceof RecordScannable;
    boolean isRecordWritable = dataset instanceof RecordWritable;
    if (isRecordScannable || isRecordWritable) {
        Type recordType = isRecordScannable ? ((RecordScannable) dataset).getRecordType() : ((RecordWritable) dataset).getRecordType();
        // Use == because that's what same class means.
        if (StructuredRecord.class == recordType) {
            return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
        }
        // otherwise, derive the schema from the record type
        LOG.debug("Enabling explore for dataset instance {}", datasetName);
        String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
        return new CreateStatementBuilder(datasetName, databaseName, tableName, shouldEscapeColumns).setSchema(hiveSchemaFor(recordType)).setTableComment("CDAP Dataset").buildWithStorageHandler(DatasetStorageHandler.class.getName(), serdeProperties);
    } else if (dataset instanceof FileSet || dataset instanceof PartitionedFileSet) {
        Map<String, String> properties = spec.getProperties();
        if (FileSetProperties.isExploreEnabled(properties)) {
            LOG.debug("Enabling explore for dataset instance {}", datasetName);
            return generateFileSetCreateStatement(datasetId, dataset, properties, truncating);
        }
    }
    // dataset is not explorable
    return null;
}
Also used : ObjectMappedTable(io.cdap.cdap.api.dataset.lib.ObjectMappedTable) Table(io.cdap.cdap.api.dataset.table.Table) RecordWritable(io.cdap.cdap.api.data.batch.RecordWritable) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) CreateStatementBuilder(io.cdap.cdap.explore.table.CreateStatementBuilder) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) RecordScannable(io.cdap.cdap.api.data.batch.RecordScannable) Type(java.lang.reflect.Type) DatasetStorageHandler(io.cdap.cdap.hive.datasets.DatasetStorageHandler) ObjectMappedTable(io.cdap.cdap.api.dataset.lib.ObjectMappedTable) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Nullable(javax.annotation.Nullable)

Example 4 with ObjectMappedTable

use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.

the class HiveExploreObjectMappedTableTestRun method setupTable.

private void setupTable(@Nullable String dbName, @Nullable String tableName) throws Exception {
    if (dbName != null) {
        runCommand(NAMESPACE_ID, "create database if not exists " + dbName, false, null, null);
    }
    datasetFramework.addInstance(ObjectMappedTable.class.getName(), MY_TABLE, setupProperties(dbName, tableName, "row_key"));
    // Accessing dataset instance to perform data operations
    ObjectMappedTable<Record> table = datasetFramework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(table);
    TransactionAware txTable = (TransactionAware) table;
    Transaction tx1 = transactionManager.startShort(100);
    txTable.startTx(tx1);
    record1 = new Record(123, 1234567890L, 3.14159f, 3.1415926535, "foobar", new byte[] { 1, 2, 3 });
    record2 = new Record(0 - 987, 9876543210L, 2.71f, 2.71112384, "hello world", new byte[] { 4, 5, 6 });
    table.write("123", record1);
    table.write("456", record2);
    Assert.assertTrue(txTable.commitTx());
    transactionManager.canCommit(tx1.getTransactionId(), txTable.getTxChanges());
    transactionManager.commit(tx1.getTransactionId(), tx1.getWritePointer());
    txTable.postTxCommit();
}
Also used : Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Record(io.cdap.cdap.explore.service.datasets.Record) ObjectMappedTable(io.cdap.cdap.api.dataset.lib.ObjectMappedTable)

Aggregations

ObjectMappedTable (io.cdap.cdap.api.dataset.lib.ObjectMappedTable)4 Test (org.junit.Test)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 TxRunnable (io.cdap.cdap.api.TxRunnable)1 DatasetContext (io.cdap.cdap.api.data.DatasetContext)1 RecordScannable (io.cdap.cdap.api.data.batch.RecordScannable)1 RecordWritable (io.cdap.cdap.api.data.batch.RecordWritable)1 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)1 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)1 Table (io.cdap.cdap.api.dataset.table.Table)1 ExploreExecutionResult (io.cdap.cdap.explore.client.ExploreExecutionResult)1 Record (io.cdap.cdap.explore.service.datasets.Record)1 CreateStatementBuilder (io.cdap.cdap.explore.table.CreateStatementBuilder)1 DatasetStorageHandler (io.cdap.cdap.hive.datasets.DatasetStorageHandler)1 DatasetId (io.cdap.cdap.proto.id.DatasetId)1 DatasetSQLSpark (io.cdap.cdap.spark.app.DatasetSQLSpark)1 Person (io.cdap.cdap.spark.app.Person)1 ApplicationManager (io.cdap.cdap.test.ApplicationManager)1 SparkManager (io.cdap.cdap.test.SparkManager)1 Type (java.lang.reflect.Type)1