Search in sources :

Example 1 with Table

use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.

the class IndexedObjectStoreDefinition method getDataset.

@Override
public IndexedObjectStore<?> getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
    Table index = getDataset(datasetContext, "index", spec, arguments, classLoader);
    ObjectStore<?> objectStore = getDataset(datasetContext, "data", spec, arguments, classLoader);
    return new IndexedObjectStore<>(spec.getName(), objectStore, index);
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table)

Example 2 with Table

use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.

the class CoreDatasetsModule method register.

@Override
public void register(DatasetDefinitionRegistry registry) {
    DatasetDefinition<Table, DatasetAdmin> tableDef = registry.get("table");
    DatasetDefinition<KeyValueTable, DatasetAdmin> kvTableDef = new KeyValueTableDefinition(KeyValueTable.TYPE, tableDef);
    registry.add(kvTableDef);
    registry.add(new KeyValueTableDefinition(KeyValueTable.class.getName(), tableDef));
    DatasetDefinition<ObjectStore, DatasetAdmin> objectStoreDef = new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef);
    registry.add(new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef));
    registry.add(new ObjectStoreDefinition(ObjectStore.class.getName(), kvTableDef));
    registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.TYPE, tableDef, objectStoreDef));
    registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.class.getName(), tableDef, objectStoreDef));
    registry.add(new IndexedTableDefinition(IndexedTable.TYPE, tableDef));
    registry.add(new IndexedTableDefinition(IndexedTable.class.getName(), tableDef));
    registry.add(new TimeseriesTableDefinition(TimeseriesTable.TYPE, tableDef));
    registry.add(new TimeseriesTableDefinition(TimeseriesTable.class.getName(), tableDef));
    registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.TYPE, tableDef));
    registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.class.getName(), tableDef));
    // in-memory table
    registry.add(new InMemoryTableDefinition(InMemoryTable.TYPE));
}
Also used : ObjectStore(io.cdap.cdap.api.dataset.lib.ObjectStore) IndexedObjectStore(io.cdap.cdap.api.dataset.lib.IndexedObjectStore) Table(io.cdap.cdap.api.dataset.table.Table) InMemoryTable(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryTable) CounterTimeseriesTable(io.cdap.cdap.api.dataset.lib.CounterTimeseriesTable) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) IndexedTable(io.cdap.cdap.api.dataset.lib.IndexedTable) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) InMemoryTableDefinition(io.cdap.cdap.data2.dataset2.lib.table.inmemory.InMemoryTableDefinition) CounterTimeseriesTableDefinition(io.cdap.cdap.api.dataset.lib.CounterTimeseriesTableDefinition) IndexedTableDefinition(io.cdap.cdap.api.dataset.lib.IndexedTableDefinition) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) IndexedObjectStoreDefinition(io.cdap.cdap.api.dataset.lib.IndexedObjectStoreDefinition) KeyValueTableDefinition(io.cdap.cdap.api.dataset.lib.KeyValueTableDefinition) IndexedObjectStoreDefinition(io.cdap.cdap.api.dataset.lib.IndexedObjectStoreDefinition) TimeseriesTableDefinition(io.cdap.cdap.api.dataset.lib.TimeseriesTableDefinition) CounterTimeseriesTableDefinition(io.cdap.cdap.api.dataset.lib.CounterTimeseriesTableDefinition)

Example 3 with Table

use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.

the class HiveExploreTableTestRun method testNonAsciiStrings.

@Test
public void testNonAsciiStrings() throws Exception {
    DatasetId ttId = NAMESPACE_ID.dataset("tt");
    datasetFramework.addInstance(Table.class.getName(), ttId, TableProperties.builder().setSchema(Schema.recordOf("record", Schema.Field.of("a", Schema.of(Schema.Type.STRING)), Schema.Field.of("b", Schema.of(Schema.Type.STRING)))).setRowFieldName("a").setExploreTableName("tt").build());
    try {
        // Accessing dataset instance to perform data operations
        Table tt = datasetFramework.getDataset(ttId, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(tt);
        Transaction tx = transactionManager.startShort(100);
        ((TransactionAware) tt).startTx(tx);
        tt.put(new Put("a", "b", "c"));
        // row key and column value are non-ASCII
        tt.put(new Put("ä", "b", "ç"));
        ((TransactionAware) tt).commitTx();
        transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) tt).getTxChanges());
        transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
        ((TransactionAware) tt).postTxCommit();
        ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from tt").get();
        List<Object> columns = results.next().getColumns();
        Assert.assertEquals(2, columns.size());
        Assert.assertEquals("a", columns.get(0));
        Assert.assertEquals("c", columns.get(1));
        columns = results.next().getColumns();
        Assert.assertEquals(2, columns.size());
        Assert.assertEquals("ä", columns.get(0));
        Assert.assertEquals("ç", columns.get(1));
    } finally {
        datasetFramework.deleteInstance(ttId);
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) Put(io.cdap.cdap.api.dataset.table.Put) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 4 with Table

use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.

the class HiveExploreTableTestRun method testTableWithDateTimestamp.

@Test
public void testTableWithDateTimestamp() throws Exception {
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
    DatasetId dtTsTable = NAMESPACE_ID.dataset("dt_ts_table");
    DatasetId otherDtTsTable = NAMESPACE_ID.dataset("other_dt_ts_table");
    Schema schema = Schema.recordOf("recordWithDateTimestamp", Schema.Field.of("int_field", Schema.of(Schema.Type.INT)), Schema.Field.of("string_field", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("date_field", Schema.nullableOf(Schema.of(Schema.LogicalType.DATE))), Schema.Field.of("ts_millis_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MILLIS))), Schema.Field.of("ts_micros_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MICROS))));
    datasetFramework.addInstance(Table.class.getName(), dtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("dt_ts_table").build());
    datasetFramework.addInstance(Table.class.getName(), otherDtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("other_dt_ts_table").build());
    try {
        // Accessing dataset instance to perform data operations
        Table table = datasetFramework.getDataset(dtTsTable, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(table);
        Transaction tx = transactionManager.startShort(100);
        ((TransactionAware) table).startTx(tx);
        Put put = new Put(Bytes.toBytes("row1"));
        put.add("int_field", 1);
        put.add("string_field", "alice");
        put.add("date_field", 0);
        put.add("ts_millis_field", 1536336590595L);
        put.add("ts_micros_field", 1536336590595123L);
        table.put(put);
        put = new Put(Bytes.toBytes("row2"));
        put.add("int_field", 2);
        put.add("string_field", "bob");
        table.put(put);
        ((TransactionAware) table).commitTx();
        transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) table).getTxChanges());
        transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
        ((TransactionAware) table).postTxCommit();
        ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from dt_ts_table").get();
        List<Object> columns = results.next().getColumns();
        Assert.assertEquals(5, columns.size());
        Assert.assertEquals("alice", columns.get(1));
        Assert.assertEquals("1970-01-01", columns.get(2));
        Assert.assertEquals("2018-09-07 16:09:50.595", columns.get(3));
        Assert.assertEquals("2018-09-07 16:09:50.595123", columns.get(4));
        columns = results.next().getColumns();
        Assert.assertEquals(5, columns.size());
        Assert.assertEquals("bob", columns.get(1));
        Assert.assertNull(columns.get(2));
        Assert.assertNull(columns.get(3));
        Assert.assertNull(columns.get(4));
        String command = "insert into other_dt_ts_table select int_field, string_field, date_field, ts_millis_field, " + "ts_micros_field from dt_ts_table";
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
        Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
        command = "select string_field, date_field, ts_millis_field, ts_micros_field from other_dt_ts_table";
        runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("string_field", "STRING", 1, null), new ColumnDesc("date_field", "DATE", 2, null), new ColumnDesc("ts_millis_field", "TIMESTAMP", 3, null), new ColumnDesc("ts_micros_field", "TIMESTAMP", 4, null)), Lists.newArrayList(new QueryResult(Lists.newArrayList("alice", "1970-01-01", "2018-09-07 16:09:50.595", "2018-09-07 16:09:50.595123")), new QueryResult(Lists.newArrayList("bob", null, null, null))));
    } finally {
        datasetFramework.deleteInstance(dtTsTable);
        datasetFramework.deleteInstance(otherDtTsTable);
    }
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) Table(io.cdap.cdap.api.dataset.table.Table) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Schema(io.cdap.cdap.api.data.schema.Schema) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) Put(io.cdap.cdap.api.dataset.table.Put) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 5 with Table

use of io.cdap.cdap.api.dataset.table.Table in project cdap by caskdata.

the class DataPipelineTest method testOuterJoin.

private void testOuterJoin(Engine engine) throws Exception {
    Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_name", Schema.of(Schema.Type.STRING)));
    String input1Name = "source1OuterJoinInput-" + engine;
    String input2Name = "source2OuterJoinInput-" + engine;
    String input3Name = "source3OuterJoinInput-" + engine;
    String outputName = "outerJoinOutput-" + engine;
    String joinerName = "outerJoiner-" + engine;
    String sinkName = "outerJoinSink-" + engine;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(input1Name, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(input2Name, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(input3Name, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage(joinerName, MockJoiner.getPlugin("t1.customer_id=t2.cust_id=t3.c_id&" + "t1.customer_name=t2.cust_name=t3.c_name", "t1", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", joinerName).addConnection("t2", joinerName).addConnection("t3", joinerName).addConnection(joinerName, sinkName).setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("OuterJoinApp-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Schema outSchema = Schema.recordOf("join.output", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
    StructuredRecord recordMartha = StructuredRecord.builder(inputSchema1).set("customer_id", "4").set("customer_name", "martha").build();
    StructuredRecord recordCar = StructuredRecord.builder(inputSchema2).set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").build();
    StructuredRecord recordBike = StructuredRecord.builder(inputSchema2).set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").build();
    StructuredRecord recordTrasCar = StructuredRecord.builder(inputSchema3).set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
    StructuredRecord recordTrasPlane = StructuredRecord.builder(inputSchema3).set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
    StructuredRecord recordTrasBike = StructuredRecord.builder(inputSchema3).set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
    // write one record to each source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(input1Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane, recordMartha));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(input2Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordCar, recordBike));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(input3Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordTrasCar, recordTrasPlane, recordTrasBike));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    StructuredRecord joinRecordSamuel = StructuredRecord.builder(outSchema).set("customer_id", "1").set("customer_name", "samuel").set("item_id", "11").set("item_price", 10000L).set("cust_id", "1").set("cust_name", "samuel").set("t_id", "1").set("c_id", "1").set("c_name", "samuel").build();
    StructuredRecord joinRecordBob = StructuredRecord.builder(outSchema).set("customer_id", "2").set("customer_name", "bob").set("t_id", "2").set("c_id", "2").set("c_name", "bob").build();
    StructuredRecord joinRecordJane = StructuredRecord.builder(outSchema).set("customer_id", "3").set("customer_name", "jane").set("item_id", "22").set("item_price", 100L).set("cust_id", "3").set("cust_name", "jane").set("t_id", "3").set("c_id", "3").set("c_name", "jane").build();
    StructuredRecord joinRecordMartha = StructuredRecord.builder(outSchema).set("customer_id", "4").set("customer_name", "martha").build();
    DataSetManager<Table> sinkManager = getDataset(outputName);
    Set<StructuredRecord> expected = ImmutableSet.of(joinRecordSamuel, joinRecordJane, joinRecordBob, joinRecordMartha);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    validateMetric(4, appId, joinerName + ".records.out");
    validateMetric(4, appId, sinkName + ".records.in");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Aggregations

Table (io.cdap.cdap.api.dataset.table.Table)148 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)76 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)72 Test (org.junit.Test)72 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)71 Schema (io.cdap.cdap.api.data.schema.Schema)69 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)67 ApplicationManager (io.cdap.cdap.test.ApplicationManager)67 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)63 WorkflowManager (io.cdap.cdap.test.WorkflowManager)59 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)53 TransactionAware (org.apache.tephra.TransactionAware)36 Transaction (org.apache.tephra.Transaction)30 ArrayList (java.util.ArrayList)28 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)27 Put (io.cdap.cdap.api.dataset.table.Put)23 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)23 HashSet (java.util.HashSet)21 HashMap (java.util.HashMap)18 Get (io.cdap.cdap.api.dataset.table.Get)16