use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.
the class SparkTest method testDatasetSQL.
@Test
@Ignore("For this to work in spark 2 and later DefaultSource should implement" + "org.apache.spark.sql.execution.datasources.FileFormat")
public void testDatasetSQL() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
DataSetManager<ObjectMappedTable<Person>> tableManager = getDataset("PersonTable");
ObjectMappedTable<Person> table = tableManager.get();
table.write("1", new Person("Bob", 10));
table.write("2", new Person("Bill", 20));
table.write("3", new Person("Berry", 30));
tableManager.flush();
SparkManager sparkManager = appManager.getSparkManager(DatasetSQLSpark.class.getSimpleName());
sparkManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
// The program executes "SELECT * FROM Person WHERE age > 10", hence expected two new entries for Bill and Berry.
tableManager.flush();
Person person = table.read("new:2");
Assert.assertEquals("Bill", person.name());
Assert.assertEquals(20, person.age());
person = table.read("new:3");
Assert.assertEquals("Berry", person.name());
Assert.assertEquals(30, person.age());
// Shouldn't have new Bob
Assert.assertNull(table.read("new:1"));
}
use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.
the class HiveExploreStructuredRecordTestRun method testObjectMappedTable.
@Test
public void testObjectMappedTable() throws Exception {
// Add a ObjectMappedTable instance
final DatasetId datasetId = NAMESPACE_ID.dataset("person");
datasetFramework.addInstance(ObjectMappedTable.class.getName(), datasetId, ObjectMappedTableProperties.builder().setType(Person.class).setRowKeyExploreName("id").setRowKeyExploreType(Schema.Type.STRING).build());
// Insert data using sql
String command = String.format("INSERT into %s (id, firstname, lastname, age) VALUES (\"%s\", \"%s\", \"%s\", %d)", getDatasetHiveName(datasetId), "bobby", "Bobby", "Bob", 15);
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
transactional.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
// Read the data back via dataset directly
ObjectMappedTable<Person> objTable = context.getDataset(datasetId.getDataset());
Person person = objTable.read("bobby");
Assert.assertNotNull(person);
Assert.assertEquals("Bobby", person.getFirstName());
Assert.assertEquals("Bob", person.getLastName());
Assert.assertEquals(15, person.getAge());
}
});
// Delete the dataset, hence also drop the table.
datasetFramework.deleteInstance(datasetId);
}
use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.
the class ExploreTableManager method generateEnableStatement.
/**
* Generate a Hive DDL statement to create a Hive table for the given dataset.
*
* @param dataset the instantiated dataset
* @param spec the dataset specification
* @param datasetId the dataset id
* @param truncating whether this call to create() is part of a truncate() operation, which is in some
* case implemented using disableExplore() followed by enableExplore()
*
* @return a CREATE TABLE statement, or null if the dataset is not explorable
* @throws UnsupportedTypeException if the dataset is a RecordScannable of a type that is not supported by Hive
*/
@Nullable
private String generateEnableStatement(Dataset dataset, DatasetSpecification spec, DatasetId datasetId, String tableName, boolean truncating) throws UnsupportedTypeException, ExploreException {
String datasetName = datasetId.getDataset();
Map<String, String> serdeProperties = ImmutableMap.of(Constants.Explore.DATASET_NAME, datasetId.getDataset(), Constants.Explore.DATASET_NAMESPACE, datasetId.getNamespace());
// or it must be a FileSet or a PartitionedFileSet with explore enabled in it properties.
if (dataset instanceof Table) {
// valid for a table not to have a schema property. this logic should really be in Table
return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, false);
}
if (dataset instanceof ObjectMappedTable) {
return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
}
boolean isRecordScannable = dataset instanceof RecordScannable;
boolean isRecordWritable = dataset instanceof RecordWritable;
if (isRecordScannable || isRecordWritable) {
Type recordType = isRecordScannable ? ((RecordScannable) dataset).getRecordType() : ((RecordWritable) dataset).getRecordType();
// Use == because that's what same class means.
if (StructuredRecord.class == recordType) {
return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
}
// otherwise, derive the schema from the record type
LOG.debug("Enabling explore for dataset instance {}", datasetName);
String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
return new CreateStatementBuilder(datasetName, databaseName, tableName, shouldEscapeColumns).setSchema(hiveSchemaFor(recordType)).setTableComment("CDAP Dataset").buildWithStorageHandler(DatasetStorageHandler.class.getName(), serdeProperties);
} else if (dataset instanceof FileSet || dataset instanceof PartitionedFileSet) {
Map<String, String> properties = spec.getProperties();
if (FileSetProperties.isExploreEnabled(properties)) {
LOG.debug("Enabling explore for dataset instance {}", datasetName);
return generateFileSetCreateStatement(datasetId, dataset, properties, truncating);
}
}
// dataset is not explorable
return null;
}
use of io.cdap.cdap.api.dataset.lib.ObjectMappedTable in project cdap by caskdata.
the class HiveExploreObjectMappedTableTestRun method setupTable.
private void setupTable(@Nullable String dbName, @Nullable String tableName) throws Exception {
if (dbName != null) {
runCommand(NAMESPACE_ID, "create database if not exists " + dbName, false, null, null);
}
datasetFramework.addInstance(ObjectMappedTable.class.getName(), MY_TABLE, setupProperties(dbName, tableName, "row_key"));
// Accessing dataset instance to perform data operations
ObjectMappedTable<Record> table = datasetFramework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
TransactionAware txTable = (TransactionAware) table;
Transaction tx1 = transactionManager.startShort(100);
txTable.startTx(tx1);
record1 = new Record(123, 1234567890L, 3.14159f, 3.1415926535, "foobar", new byte[] { 1, 2, 3 });
record2 = new Record(0 - 987, 9876543210L, 2.71f, 2.71112384, "hello world", new byte[] { 4, 5, 6 });
table.write("123", record1);
table.write("456", record2);
Assert.assertTrue(txTable.commitTx());
transactionManager.canCommit(tx1.getTransactionId(), txTable.getTxChanges());
transactionManager.commit(tx1.getTransactionId(), tx1.getWritePointer());
txTable.postTxCommit();
}
Aggregations