Search in sources :

Example 6 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class GenerateClientUsageExample method queryClient.

public void queryClient() throws Exception {
    // Construct the client used to interact with CDAP
    QueryClient queryClient = new QueryClient(clientConfig);
    // Perform an ad-hoc query using the Purchase example
    ListenableFuture<ExploreExecutionResult> resultFuture = queryClient.execute(NamespaceId.DEFAULT, "SELECT * FROM dataset_history WHERE customer IN ('Alice','Bob')");
    ExploreExecutionResult results = resultFuture.get();
    // Fetch schema
    List<ColumnDesc> schema = results.getResultSchema();
    String[] header = new String[schema.size()];
    for (int i = 0; i < header.length; i++) {
        ColumnDesc column = schema.get(i);
        // Hive columns start at 1
        int index = column.getPosition() - 1;
        header[index] = column.getName() + ": " + column.getType();
    }
}
Also used : ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) QueryClient(co.cask.cdap.client.QueryClient)

Example 7 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testAvroFormattedStream.

@Test
public void testAvroFormattedStream() throws Exception {
    StreamId streamId = NAMESPACE_ID.stream("avroStream");
    createStream(streamId);
    try {
        Schema schema = Schema.recordOf("purchase", Schema.Field.of("user", Schema.of(Schema.Type.STRING)), Schema.Field.of("num", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
        FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
        StreamProperties properties = new StreamProperties(Long.MAX_VALUE, formatSpecification, 1000);
        setStreamProperties(NAMESPACE_ID.getNamespace(), "avroStream", properties);
        // our schemas are compatible
        org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 5, 3.14));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 10, 2.34));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userY", 1, 1.23));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 50, 45.67));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 100, 98.76));
        Double xPrice = 5 * 3.14 + 10 * 2.34;
        Double yPrice = 1.23;
        Double zPrice = 50 * 45.67 + 100 * 98.76;
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, "SELECT `user`, sum(num) as total_num, sum(price * num) as total_price " + "FROM " + getTableName(streamId) + " GROUP BY `user` ORDER BY total_price DESC").get();
        Assert.assertTrue(result.hasNext());
        Assert.assertEquals(Lists.newArrayList(new ColumnDesc("user", "STRING", 1, null), new ColumnDesc("total_num", "BIGINT", 2, null), new ColumnDesc("total_price", "DOUBLE", 3, null)), result.getResultSchema());
        // should get 3 rows
        // first row should be for userZ
        List<Object> rowColumns = result.next().getColumns();
        // toString b/c avro returns a utf8 object for strings
        Assert.assertEquals("userZ", rowColumns.get(0).toString());
        Assert.assertEquals(150L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(zPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // 2nd row, should be userX
        rowColumns = result.next().getColumns();
        Assert.assertEquals("userX", rowColumns.get(0).toString());
        Assert.assertEquals(15L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(xPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // 3rd row, should be userY
        rowColumns = result.next().getColumns();
        Assert.assertEquals("userY", rowColumns.get(0).toString());
        Assert.assertEquals(1L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(yPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // shouldn't be any more results
        Assert.assertFalse(result.hasNext());
    } finally {
        dropStream(streamId);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Schema(co.cask.cdap.api.data.schema.Schema) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StreamProperties(co.cask.cdap.proto.StreamProperties) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 8 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testSelectStarOnStream.

private void testSelectStarOnStream() throws Exception {
    ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + streamTableName).get();
    // check schema
    List<ColumnDesc> expectedSchema = Lists.newArrayList(new ColumnDesc(streamTableName + ".ts", "BIGINT", 1, null), new ColumnDesc(streamTableName + ".headers", "map<string,string>", 2, null), new ColumnDesc(streamTableName + ".body", "STRING", 3, null));
    Assert.assertEquals(expectedSchema, results.getResultSchema());
    // check each result, without checking timestamp since that changes for each test
    // first result
    List<Object> columns = results.next().getColumns();
    // maps are returned as json objects...
    Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
    Assert.assertEquals(body1, columns.get(2));
    // second result
    columns = results.next().getColumns();
    Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
    Assert.assertEquals(body2, columns.get(2));
    // third result
    columns = results.next().getColumns();
    Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
    Assert.assertEquals(body3, columns.get(2));
    // should not be any more
    Assert.assertFalse(results.hasNext());
}
Also used : ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult)

Example 9 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class HiveExploreServiceTestRun method testQueriesCount.

@Test
public void testQueriesCount() throws Exception {
    NamespaceId testNamespace1 = new NamespaceId("testQueriesCount");
    NamespaceMeta namespaceMeta = new NamespaceMeta.Builder().setName(testNamespace1).build();
    namespaceAdmin.create(namespaceMeta);
    exploreClient.addNamespace(namespaceMeta).get();
    try {
        Assert.assertEquals(0, exploreService.getActiveQueryCount(testNamespace1));
        ListenableFuture<ExploreExecutionResult> future = exploreClient.submit(testNamespace1, "show tables");
        ExploreExecutionResult result = null;
        try {
            result = future.get();
            Assert.assertEquals(1, exploreService.getActiveQueryCount(testNamespace1));
        } finally {
            if (result != null) {
                result.close();
            }
            Assert.assertEquals(0, exploreService.getActiveQueryCount(testNamespace1));
        }
    } finally {
        exploreClient.removeNamespace(testNamespace1).get();
    }
}
Also used : NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) NamespaceId(co.cask.cdap.proto.id.NamespaceId) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 10 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testOrcFileset.

@Test
public void testOrcFileset() throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("orcfiles");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    datasetFramework.addInstance("fileSet", datasetInstanceId, FileSetProperties.builder().setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.ql.io.orc.OrcSerde").setExploreInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").setExploreSchema("id int, name string").build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // insert data into the table
    ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, String.format("insert into table %s values (1, 'samuel'), (2, 'dwayne')", tableName)).get();
    result.close();
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, Lists.newArrayList(new ColumnDesc(tableName + ".id", "INT", 1, null), new ColumnDesc(tableName + ".name", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(1, "samuel")), new QueryResult(Lists.<Object>newArrayList(2, "dwayne"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)25 Test (org.junit.Test)16 ColumnDesc (co.cask.cdap.proto.ColumnDesc)13 QueryResult (co.cask.cdap.proto.QueryResult)9 DatasetId (co.cask.cdap.proto.id.DatasetId)7 Transaction (org.apache.tephra.Transaction)5 Schema (co.cask.cdap.api.data.schema.Schema)4 Table (co.cask.cdap.api.dataset.table.Table)3 NamespaceId (co.cask.cdap.proto.id.NamespaceId)3 StreamId (co.cask.cdap.proto.id.StreamId)3 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)2 KeyExtendedStructValueTableDefinition (co.cask.cdap.explore.service.datasets.KeyExtendedStructValueTableDefinition)2 KeyStructValueTableDefinition (co.cask.cdap.explore.service.datasets.KeyStructValueTableDefinition)2 WritableKeyStructValueTableDefinition (co.cask.cdap.explore.service.datasets.WritableKeyStructValueTableDefinition)2 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)2 QueryStatus (co.cask.cdap.proto.QueryStatus)2 StreamProperties (co.cask.cdap.proto.StreamProperties)2 Put (co.cask.cdap.api.dataset.table.Put)1 RowMaker (co.cask.cdap.cli.util.RowMaker)1 Table (co.cask.cdap.cli.util.table.Table)1