use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.
the class IndexedObjectStoreDefinition method getDataset.
@Override
public IndexedObjectStore<?> getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
Table index = getDataset(datasetContext, "index", spec, arguments, classLoader);
ObjectStore<?> objectStore = getDataset(datasetContext, "data", spec, arguments, classLoader);
return new IndexedObjectStore<>(spec.getName(), objectStore, index);
}
use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.
the class CoreDatasetsModule method register.
@Override
public void register(DatasetDefinitionRegistry registry) {
DatasetDefinition<Table, DatasetAdmin> tableDef = registry.get("table");
DatasetDefinition<KeyValueTable, DatasetAdmin> kvTableDef = new KeyValueTableDefinition(KeyValueTable.TYPE, tableDef);
registry.add(kvTableDef);
registry.add(new KeyValueTableDefinition(KeyValueTable.class.getName(), tableDef));
DatasetDefinition<ObjectStore, DatasetAdmin> objectStoreDef = new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef);
registry.add(new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef));
registry.add(new ObjectStoreDefinition(ObjectStore.class.getName(), kvTableDef));
registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.TYPE, tableDef, objectStoreDef));
registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.class.getName(), tableDef, objectStoreDef));
registry.add(new IndexedTableDefinition(IndexedTable.TYPE, tableDef));
registry.add(new IndexedTableDefinition(IndexedTable.class.getName(), tableDef));
registry.add(new TimeseriesTableDefinition(TimeseriesTable.TYPE, tableDef));
registry.add(new TimeseriesTableDefinition(TimeseriesTable.class.getName(), tableDef));
registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.TYPE, tableDef));
registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.class.getName(), tableDef));
// in-memory table
registry.add(new InMemoryTableDefinition(InMemoryTable.TYPE));
}
use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.
the class HiveExploreTableTestRun method testNonAsciiStrings.
@Test
public void testNonAsciiStrings() throws Exception {
DatasetId ttId = NAMESPACE_ID.dataset("tt");
datasetFramework.addInstance(Table.class.getName(), ttId, TableProperties.builder().setSchema(Schema.recordOf("record", Schema.Field.of("a", Schema.of(Schema.Type.STRING)), Schema.Field.of("b", Schema.of(Schema.Type.STRING)))).setRowFieldName("a").setExploreTableName("tt").build());
try {
// Accessing dataset instance to perform data operations
Table tt = datasetFramework.getDataset(ttId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(tt);
Transaction tx = transactionManager.startShort(100);
((TransactionAware) tt).startTx(tx);
tt.put(new Put("a", "b", "c"));
// row key and column value are non-ASCII
tt.put(new Put("ä", "b", "ç"));
((TransactionAware) tt).commitTx();
transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) tt).getTxChanges());
transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
((TransactionAware) tt).postTxCommit();
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from tt").get();
List<Object> columns = results.next().getColumns();
Assert.assertEquals(2, columns.size());
Assert.assertEquals("a", columns.get(0));
Assert.assertEquals("c", columns.get(1));
columns = results.next().getColumns();
Assert.assertEquals(2, columns.size());
Assert.assertEquals("ä", columns.get(0));
Assert.assertEquals("ç", columns.get(1));
} finally {
datasetFramework.deleteInstance(ttId);
}
}
use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.
the class HiveExploreTableTestRun method testTableWithDateTimestamp.
@Test
public void testTableWithDateTimestamp() throws Exception {
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
DatasetId dtTsTable = NAMESPACE_ID.dataset("dt_ts_table");
DatasetId otherDtTsTable = NAMESPACE_ID.dataset("other_dt_ts_table");
Schema schema = Schema.recordOf("recordWithDateTimestamp", Schema.Field.of("int_field", Schema.of(Schema.Type.INT)), Schema.Field.of("string_field", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("date_field", Schema.nullableOf(Schema.of(Schema.LogicalType.DATE))), Schema.Field.of("ts_millis_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MILLIS))), Schema.Field.of("ts_micros_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MICROS))));
datasetFramework.addInstance(Table.class.getName(), dtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("dt_ts_table").build());
datasetFramework.addInstance(Table.class.getName(), otherDtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("other_dt_ts_table").build());
try {
// Accessing dataset instance to perform data operations
Table table = datasetFramework.getDataset(dtTsTable, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
Transaction tx = transactionManager.startShort(100);
((TransactionAware) table).startTx(tx);
Put put = new Put(Bytes.toBytes("row1"));
put.add("int_field", 1);
put.add("string_field", "alice");
put.add("date_field", 0);
put.add("ts_millis_field", 1536336590595L);
put.add("ts_micros_field", 1536336590595123L);
table.put(put);
put = new Put(Bytes.toBytes("row2"));
put.add("int_field", 2);
put.add("string_field", "bob");
table.put(put);
((TransactionAware) table).commitTx();
transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) table).getTxChanges());
transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
((TransactionAware) table).postTxCommit();
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from dt_ts_table").get();
List<Object> columns = results.next().getColumns();
Assert.assertEquals(5, columns.size());
Assert.assertEquals("alice", columns.get(1));
Assert.assertEquals("1970-01-01", columns.get(2));
Assert.assertEquals("2018-09-07 16:09:50.595", columns.get(3));
Assert.assertEquals("2018-09-07 16:09:50.595123", columns.get(4));
columns = results.next().getColumns();
Assert.assertEquals(5, columns.size());
Assert.assertEquals("bob", columns.get(1));
Assert.assertNull(columns.get(2));
Assert.assertNull(columns.get(3));
Assert.assertNull(columns.get(4));
String command = "insert into other_dt_ts_table select int_field, string_field, date_field, ts_millis_field, " + "ts_micros_field from dt_ts_table";
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
command = "select string_field, date_field, ts_millis_field, ts_micros_field from other_dt_ts_table";
runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("string_field", "STRING", 1, null), new ColumnDesc("date_field", "DATE", 2, null), new ColumnDesc("ts_millis_field", "TIMESTAMP", 3, null), new ColumnDesc("ts_micros_field", "TIMESTAMP", 4, null)), Lists.newArrayList(new QueryResult(Lists.newArrayList("alice", "1970-01-01", "2018-09-07 16:09:50.595", "2018-09-07 16:09:50.595123")), new QueryResult(Lists.newArrayList("bob", null, null, null))));
} finally {
datasetFramework.deleteInstance(dtTsTable);
datasetFramework.deleteInstance(otherDtTsTable);
}
}
use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.
the class DataPipelineTest method testOuterJoin.
private void testOuterJoin(Engine engine) throws Exception {
Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_name", Schema.of(Schema.Type.STRING)));
String input1Name = "source1OuterJoinInput-" + engine;
String input2Name = "source2OuterJoinInput-" + engine;
String input3Name = "source3OuterJoinInput-" + engine;
String outputName = "outerJoinOutput-" + engine;
String joinerName = "outerJoiner-" + engine;
String sinkName = "outerJoinSink-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(input1Name, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(input2Name, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(input3Name, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage(joinerName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id=t3.c_id&" + "t1.customer_name=t2.cust_name=t3.c_name", "t1", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", joinerName).addConnection("t2", joinerName).addConnection("t3", joinerName).addConnection(joinerName, sinkName).setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("OuterJoinApp-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema outSchema = Schema.recordOf("join.output", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
StructuredRecord recordMartha = StructuredRecord.builder(inputSchema1).set("customer_id", "4").set("customer_name", "martha").build();
StructuredRecord recordCar = StructuredRecord.builder(inputSchema2).set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").build();
StructuredRecord recordBike = StructuredRecord.builder(inputSchema2).set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").build();
StructuredRecord recordTrasCar = StructuredRecord.builder(inputSchema3).set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
StructuredRecord recordTrasPlane = StructuredRecord.builder(inputSchema3).set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
StructuredRecord recordTrasBike = StructuredRecord.builder(inputSchema3).set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(input1Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane, recordMartha));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(input2Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordCar, recordBike));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(input3Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordTrasCar, recordTrasPlane, recordTrasBike));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
StructuredRecord joinRecordSamuel = StructuredRecord.builder(outSchema).set("customer_id", "1").set("customer_name", "samuel").set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
StructuredRecord joinRecordBob = StructuredRecord.builder(outSchema).set("customer_id", "2").set("customer_name", "bob").set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
StructuredRecord joinRecordJane = StructuredRecord.builder(outSchema).set("customer_id", "3").set("customer_name", "jane").set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
StructuredRecord joinRecordMartha = StructuredRecord.builder(outSchema).set("customer_id", "4").set("customer_name", "martha").build();
DataSetManager<Table> sinkManager = getDataset(outputName);
Set<StructuredRecord> expected = ImmutableSet.of(joinRecordSamuel, joinRecordJane, joinRecordBob, joinRecordMartha);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(4, appId, joinerName + ".records.out");
validateMetric(4, appId, sinkName + ".records.in");
}
Aggregations