Search in sources :

Example 11 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testSelectStarOnStream.

private void testSelectStarOnStream() throws Exception {
    ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + streamTableName).get();
    // check schema
    List<ColumnDesc> expectedSchema = Lists.newArrayList(new ColumnDesc(streamTableName + ".ts", "BIGINT", 1, null), new ColumnDesc(streamTableName + ".headers", "map<string,string>", 2, null), new ColumnDesc(streamTableName + ".body", "STRING", 3, null));
    Assert.assertEquals(expectedSchema, results.getResultSchema());
    // check each result, without checking timestamp since that changes for each test
    // first result
    List<Object> columns = results.next().getColumns();
    // maps are returned as json objects...
    Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
    Assert.assertEquals(body1, columns.get(2));
    // second result
    columns = results.next().getColumns();
    Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
    Assert.assertEquals(body2, columns.get(2));
    // third result
    columns = results.next().getColumns();
    Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
    Assert.assertEquals(body3, columns.get(2));
    // should not be any more
    Assert.assertFalse(results.hasNext());
}
Also used : ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult)

Example 12 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testJoinOnStreams.

@Test
public void testJoinOnStreams() throws Exception {
    StreamId streamId1 = NAMESPACE_ID.stream("jointest1");
    StreamId streamId2 = NAMESPACE_ID.stream("jointest2");
    grantAndAssertSuccess(streamId1, USER, EnumSet.allOf(Action.class));
    grantAndAssertSuccess(streamId2, USER, EnumSet.allOf(Action.class));
    createStream(streamId1);
    try {
        createStream(streamId2);
        try {
            sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
            sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("XYZ"));
            sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
            sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("DEF"));
            runCommand(NAMESPACE_ID, "select " + getTableName(streamId1) + ".body, " + getTableName(streamId2) + ".body" + " from " + getTableName(streamId1) + " join " + getTableName(streamId2) + " on (" + getTableName(streamId1) + ".body = " + getTableName(streamId2) + ".body)", true, Lists.newArrayList(new ColumnDesc(getTableName(streamId1) + ".body", "STRING", 1, null), new ColumnDesc(getTableName(streamId2) + ".body", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("ABC", "ABC"))));
        } finally {
            dropStream(streamId2);
        }
    } finally {
        dropStream(streamId1);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Action(co.cask.cdap.proto.security.Action) QueryResult(co.cask.cdap.proto.QueryResult) ColumnDesc(co.cask.cdap.proto.ColumnDesc) Test(org.junit.Test)

Example 13 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreServiceStreamTest method testAvroFormattedStream.

@Test
public void testAvroFormattedStream() throws Exception {
    StreamId streamId = NAMESPACE_ID.stream("avroStream");
    grantAndAssertSuccess(streamId, USER, EnumSet.allOf(Action.class));
    createStream(streamId);
    try {
        Schema schema = Schema.recordOf("purchase", Schema.Field.of("user", Schema.of(Schema.Type.STRING)), Schema.Field.of("num", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
        FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
        StreamProperties properties = new StreamProperties(Long.MAX_VALUE, formatSpecification, 1000);
        setStreamProperties(NAMESPACE_ID.getNamespace(), "avroStream", properties);
        // our schemas are compatible
        org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 5, 3.14));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 10, 2.34));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userY", 1, 1.23));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 50, 45.67));
        sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 100, 98.76));
        Double xPrice = 5 * 3.14 + 10 * 2.34;
        Double yPrice = 1.23;
        Double zPrice = 50 * 45.67 + 100 * 98.76;
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, "SELECT `user`, sum(num) as total_num, sum(price * num) as total_price " + "FROM " + getTableName(streamId) + " GROUP BY `user` ORDER BY total_price DESC").get();
        Assert.assertTrue(result.hasNext());
        Assert.assertEquals(Lists.newArrayList(new ColumnDesc("user", "STRING", 1, null), new ColumnDesc("total_num", "BIGINT", 2, null), new ColumnDesc("total_price", "DOUBLE", 3, null)), result.getResultSchema());
        // should get 3 rows
        // first row should be for userZ
        List<Object> rowColumns = result.next().getColumns();
        // toString b/c avro returns a utf8 object for strings
        Assert.assertEquals("userZ", rowColumns.get(0).toString());
        Assert.assertEquals(150L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(zPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // 2nd row, should be userX
        rowColumns = result.next().getColumns();
        Assert.assertEquals("userX", rowColumns.get(0).toString());
        Assert.assertEquals(15L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(xPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // 3rd row, should be userY
        rowColumns = result.next().getColumns();
        Assert.assertEquals("userY", rowColumns.get(0).toString());
        Assert.assertEquals(1L, rowColumns.get(1));
        Assert.assertTrue(Math.abs(yPrice - (Double) rowColumns.get(2)) < 0.0000001);
        // shouldn't be any more results
        Assert.assertFalse(result.hasNext());
    } finally {
        dropStream(streamId);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Action(co.cask.cdap.proto.security.Action) Schema(co.cask.cdap.api.data.schema.Schema) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StreamProperties(co.cask.cdap.proto.StreamProperties) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 14 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreServiceTimeoutTest method testTimeoutFetchAllResults.

@Test
public void testTimeoutFetchAllResults() throws Exception {
    Set<Long> beforeTxns = transactionManager.getCurrentState().getInProgress().keySet();
    QueryHandle handle = exploreService.execute(NAMESPACE_ID, "select key, value from " + MY_TABLE_NAME);
    Set<Long> queryTxns = Sets.difference(transactionManager.getCurrentState().getInProgress().keySet(), beforeTxns);
    Assert.assertFalse(queryTxns.isEmpty());
    QueryStatus status = waitForCompletionStatus(handle, 200, TimeUnit.MILLISECONDS, 20);
    Assert.assertEquals(QueryStatus.OpStatus.FINISHED, status.getStatus());
    Assert.assertTrue(status.hasResults());
    List<ColumnDesc> schema = exploreService.getResultSchema(handle);
    // noinspection StatementWithEmptyBody
    while (!exploreService.nextResults(handle, 100).isEmpty()) {
    // nothing to do
    }
    // Sleep for some time for txn to get closed
    TimeUnit.SECONDS.sleep(1);
    // Make sure that the transaction got closed
    Assert.assertEquals(ImmutableSet.<Long>of(), Sets.intersection(queryTxns, transactionManager.getCurrentState().getInProgress().keySet()).immutableCopy());
    // Check if calls using inactive handle still work
    Assert.assertEquals(status, exploreService.getStatus(handle));
    Assert.assertEquals(schema, exploreService.getResultSchema(handle));
    exploreService.close(handle);
    // Sleep for timeout to happen
    TimeUnit.SECONDS.sleep(INACTIVE_OPERATION_TIMEOUT_SECS + 3);
    try {
        exploreService.getStatus(handle);
        Assert.fail("Should throw HandleNotFoundException due to operation cleanup");
    } catch (HandleNotFoundException e) {
    // Expected exception due to timeout
    }
}
Also used : QueryHandle(co.cask.cdap.proto.QueryHandle) ColumnDesc(co.cask.cdap.proto.ColumnDesc) QueryStatus(co.cask.cdap.proto.QueryStatus) Test(org.junit.Test)

Example 15 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreServiceTimeoutTest method testTimeoutNoResults.

@Test
public void testTimeoutNoResults() throws Exception {
    Set<Long> beforeTxns = transactionManager.getCurrentState().getInProgress().keySet();
    QueryHandle handle = exploreService.execute(NAMESPACE_ID, "drop table if exists not_existing_table_name");
    Set<Long> queryTxns = Sets.difference(transactionManager.getCurrentState().getInProgress().keySet(), beforeTxns);
    Assert.assertFalse(queryTxns.isEmpty());
    QueryStatus status = waitForCompletionStatus(handle, 200, TimeUnit.MILLISECONDS, 20);
    Assert.assertEquals(QueryStatus.OpStatus.FINISHED, status.getStatus());
    Assert.assertFalse(status.hasResults());
    List<ColumnDesc> schema = exploreService.getResultSchema(handle);
    // Sleep for some time for txn to get closed
    TimeUnit.SECONDS.sleep(1);
    // Make sure that the transaction got closed
    Assert.assertEquals(ImmutableSet.<Long>of(), Sets.intersection(queryTxns, transactionManager.getCurrentState().getInProgress().keySet()).immutableCopy());
    // Check if calls using inactive handle still work
    Assert.assertEquals(status, exploreService.getStatus(handle));
    Assert.assertEquals(schema, exploreService.getResultSchema(handle));
    exploreService.close(handle);
    // Sleep for timeout to happen
    TimeUnit.SECONDS.sleep(INACTIVE_OPERATION_TIMEOUT_SECS + 3);
    try {
        exploreService.getStatus(handle);
        Assert.fail("Should throw HandleNotFoundException due to operation cleanup");
    } catch (HandleNotFoundException e) {
    // Expected exception due to timeout
    }
}
Also used : QueryHandle(co.cask.cdap.proto.QueryHandle) ColumnDesc(co.cask.cdap.proto.ColumnDesc) QueryStatus(co.cask.cdap.proto.QueryStatus) Test(org.junit.Test)

Aggregations

ColumnDesc (co.cask.cdap.proto.ColumnDesc)38 QueryResult (co.cask.cdap.proto.QueryResult)23 Test (org.junit.Test)21 DatasetId (co.cask.cdap.proto.id.DatasetId)14 ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)8 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 SQLException (java.sql.SQLException)7 Location (org.apache.twill.filesystem.Location)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 Schema (co.cask.cdap.api.data.schema.Schema)4 ExploreClient (co.cask.cdap.explore.client.ExploreClient)4 MockExploreClient (co.cask.cdap.explore.client.MockExploreClient)4 QueryStatus (co.cask.cdap.proto.QueryStatus)4 StreamId (co.cask.cdap.proto.id.StreamId)4 ResultSet (java.sql.ResultSet)4 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties)3 Table (co.cask.cdap.api.dataset.table.Table)3 QueryHandle (co.cask.cdap.proto.QueryHandle)3