Search in sources :

Example 21 with ExploreExecutionResult

use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.

the class HiveExploreStructuredRecordTestRun method testSelectStar.

@Test
public void testSelectStar() throws Exception {
    List<ColumnDesc> expectedSchema = Lists.newArrayList(new ColumnDesc(MY_TABLE_NAME + ".id", "STRING", 1, null), new ColumnDesc(MY_TABLE_NAME + ".subject", "STRING", 2, null), new ColumnDesc(MY_TABLE_NAME + ".body", "STRING", 3, null), new ColumnDesc(MY_TABLE_NAME + ".sender", "STRING", 4, null));
    ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + MY_TABLE_NAME).get();
    // check schema
    Assert.assertEquals(expectedSchema, results.getResultSchema());
    List<Object> columns = results.next().getColumns();
    // check results
    Assert.assertEquals("email1", columns.get(0));
    Assert.assertEquals("this is the subject", columns.get(1));
    Assert.assertEquals("this is the body", columns.get(2));
    Assert.assertEquals("sljackson@boss.com", columns.get(3));
    // should not be any more
    Assert.assertFalse(results.hasNext());
}
Also used : ColumnDesc(io.cdap.cdap.proto.ColumnDesc) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 22 with ExploreExecutionResult

use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.

the class HiveExploreTableTestRun method testTableWithDateTimestamp.

@Test
public void testTableWithDateTimestamp() throws Exception {
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
    DatasetId dtTsTable = NAMESPACE_ID.dataset("dt_ts_table");
    DatasetId otherDtTsTable = NAMESPACE_ID.dataset("other_dt_ts_table");
    Schema schema = Schema.recordOf("recordWithDateTimestamp", Schema.Field.of("int_field", Schema.of(Schema.Type.INT)), Schema.Field.of("string_field", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("date_field", Schema.nullableOf(Schema.of(Schema.LogicalType.DATE))), Schema.Field.of("ts_millis_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MILLIS))), Schema.Field.of("ts_micros_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MICROS))));
    datasetFramework.addInstance(Table.class.getName(), dtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("dt_ts_table").build());
    datasetFramework.addInstance(Table.class.getName(), otherDtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("other_dt_ts_table").build());
    try {
        // Accessing dataset instance to perform data operations
        Table table = datasetFramework.getDataset(dtTsTable, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(table);
        Transaction tx = transactionManager.startShort(100);
        ((TransactionAware) table).startTx(tx);
        Put put = new Put(Bytes.toBytes("row1"));
        put.add("int_field", 1);
        put.add("string_field", "alice");
        put.add("date_field", 0);
        put.add("ts_millis_field", 1536336590595L);
        put.add("ts_micros_field", 1536336590595123L);
        table.put(put);
        put = new Put(Bytes.toBytes("row2"));
        put.add("int_field", 2);
        put.add("string_field", "bob");
        table.put(put);
        ((TransactionAware) table).commitTx();
        transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) table).getTxChanges());
        transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
        ((TransactionAware) table).postTxCommit();
        ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from dt_ts_table").get();
        List<Object> columns = results.next().getColumns();
        Assert.assertEquals(5, columns.size());
        Assert.assertEquals("alice", columns.get(1));
        Assert.assertEquals("1970-01-01", columns.get(2));
        Assert.assertEquals("2018-09-07 16:09:50.595", columns.get(3));
        Assert.assertEquals("2018-09-07 16:09:50.595123", columns.get(4));
        columns = results.next().getColumns();
        Assert.assertEquals(5, columns.size());
        Assert.assertEquals("bob", columns.get(1));
        Assert.assertNull(columns.get(2));
        Assert.assertNull(columns.get(3));
        Assert.assertNull(columns.get(4));
        String command = "insert into other_dt_ts_table select int_field, string_field, date_field, ts_millis_field, " + "ts_micros_field from dt_ts_table";
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
        Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
        command = "select string_field, date_field, ts_millis_field, ts_micros_field from other_dt_ts_table";
        runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("string_field", "STRING", 1, null), new ColumnDesc("date_field", "DATE", 2, null), new ColumnDesc("ts_millis_field", "TIMESTAMP", 3, null), new ColumnDesc("ts_micros_field", "TIMESTAMP", 4, null)), Lists.newArrayList(new QueryResult(Lists.newArrayList("alice", "1970-01-01", "2018-09-07 16:09:50.595", "2018-09-07 16:09:50.595123")), new QueryResult(Lists.newArrayList("bob", null, null, null))));
    } finally {
        datasetFramework.deleteInstance(dtTsTable);
        datasetFramework.deleteInstance(otherDtTsTable);
    }
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) Table(io.cdap.cdap.api.dataset.table.Table) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Schema(io.cdap.cdap.api.data.schema.Schema) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) Put(io.cdap.cdap.api.dataset.table.Put) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 23 with ExploreExecutionResult

use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.

the class HiveExploreObjectMappedTableTestRun method testSelectStar.

public void testSelectStar(String tableToQuery, String tableInSchema) throws Exception {
    List<ColumnDesc> expectedSchema = Lists.newArrayList(new ColumnDesc(tableInSchema + ".row_key", "STRING", 1, null), new ColumnDesc(tableInSchema + ".bytearrayfield", "BINARY", 2, null), new ColumnDesc(tableInSchema + ".doublefield", "DOUBLE", 3, null), new ColumnDesc(tableInSchema + ".floatfield", "FLOAT", 4, null), new ColumnDesc(tableInSchema + ".intfield", "INT", 5, null), new ColumnDesc(tableInSchema + ".longfield", "BIGINT", 6, null), new ColumnDesc(tableInSchema + ".stringfield", "STRING", 7, null));
    ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + tableToQuery).get();
    // check schema
    Assert.assertEquals(expectedSchema, results.getResultSchema());
    List<Object> columns = results.next().getColumns();
    // check record1
    Assert.assertEquals("123", columns.get(0));
    Assert.assertArrayEquals(record1.byteArrayField, (byte[]) columns.get(1));
    Assert.assertTrue(Math.abs(record1.doubleField - (Double) columns.get(2)) < 0.000001);
    // sigh... why are floats returned as doubles??
    Assert.assertTrue(Math.abs(record1.floatField - (Double) columns.get(3)) < 0.000001);
    Assert.assertEquals(record1.intField, columns.get(4));
    Assert.assertEquals(record1.longField, columns.get(5));
    Assert.assertEquals(record1.stringField, columns.get(6));
    // check record2
    columns = results.next().getColumns();
    Assert.assertEquals("456", columns.get(0));
    Assert.assertArrayEquals(record2.byteArrayField, (byte[]) columns.get(1));
    Assert.assertTrue(Math.abs(record2.doubleField - (Double) columns.get(2)) < 0.000001);
    Assert.assertTrue(Math.abs(record2.floatField - (Double) columns.get(3)) < 0.000001);
    Assert.assertEquals(record2.intField, columns.get(4));
    Assert.assertEquals(record2.longField, columns.get(5));
    Assert.assertEquals(record2.stringField, columns.get(6));
    // should not be any more
    Assert.assertFalse(results.hasNext());
}
Also used : ColumnDesc(io.cdap.cdap.proto.ColumnDesc) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult)

Example 24 with ExploreExecutionResult

use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.

the class HiveExploreServiceFileSetTestRun method testOrcFileset.

@Test
public void testOrcFileset() throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("orcfiles");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    datasetFramework.addInstance("fileSet", datasetInstanceId, FileSetProperties.builder().setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.ql.io.orc.OrcSerde").setExploreInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").setExploreSchema("id int, name string").build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // insert data into the table
    ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, String.format("insert into table %s values (1, 'samuel'), (2, 'dwayne')", tableName)).get();
    result.close();
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, Lists.newArrayList(new ColumnDesc(tableName + ".id", "INT", 1, null), new ColumnDesc(tableName + ".name", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(1, "samuel")), new QueryResult(Lists.<Object>newArrayList(2, "dwayne"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 25 with ExploreExecutionResult

use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.

the class WritableDatasetTestRun method assertSelectAll.

private void assertSelectAll(NamespaceId namespace, String table, List<List<Object>> expectedResults) throws Exception {
    ExploreExecutionResult result = exploreClient.submit(namespace, "select * from " + table).get();
    for (List<Object> expectedResult : expectedResults) {
        Assert.assertEquals(expectedResult, result.next().getColumns());
    }
    Assert.assertFalse(result.hasNext());
    result.close();
}
Also used : ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult)

Aggregations

ExploreExecutionResult (io.cdap.cdap.explore.client.ExploreExecutionResult)52 Test (org.junit.Test)36 ColumnDesc (io.cdap.cdap.proto.ColumnDesc)24 DatasetId (io.cdap.cdap.proto.id.DatasetId)20 QueryResult (io.cdap.cdap.proto.QueryResult)18 Transaction (org.apache.tephra.Transaction)14 Table (io.cdap.cdap.api.dataset.table.Table)10 Schema (io.cdap.cdap.api.data.schema.Schema)6 Put (io.cdap.cdap.api.dataset.table.Put)6 NamespaceId (io.cdap.cdap.proto.id.NamespaceId)6 TransactionAware (org.apache.tephra.TransactionAware)6 ObjectMappedTable (io.cdap.cdap.api.dataset.lib.ObjectMappedTable)4 QueryClient (io.cdap.cdap.client.QueryClient)4 KeyExtendedStructValueTableDefinition (io.cdap.cdap.explore.service.datasets.KeyExtendedStructValueTableDefinition)4 KeyStructValueTableDefinition (io.cdap.cdap.explore.service.datasets.KeyStructValueTableDefinition)4 WritableKeyStructValueTableDefinition (io.cdap.cdap.explore.service.datasets.WritableKeyStructValueTableDefinition)4 NamespaceMeta (io.cdap.cdap.proto.NamespaceMeta)4 QueryStatus (io.cdap.cdap.proto.QueryStatus)4 TxRunnable (io.cdap.cdap.api.TxRunnable)2 DatasetContext (io.cdap.cdap.api.data.DatasetContext)2