Search in sources :

Example 31 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedTextFile.

// this tests mainly the support for different text formats. Other features (partitioning etc.) are tested above.
private void testPartitionedTextFile(String name, String format, String delim, String fileDelim) throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset(name);
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    PartitionedFileSetProperties.Builder builder = (PartitionedFileSetProperties.Builder) PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(name).setEnableExploreOnCreate(true).setExploreSchema("key STRING, value INT").setExploreFormat(format);
    if (delim != null) {
        builder.setExploreFormatProperty("delimiter", delim);
    }
    datasetFramework.addInstance("partitionedFileSet", datasetInstanceId, builder.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add a partitions. Beware that Hive expects a partition to be a directory, so we create a dir with one file
    Location location1 = fileSet.getLocation("file1/nn");
    FileWriterHelper.generateTextFile(location1.getOutputStream(), fileDelim, "x", 1, 2);
    PartitionKey key1 = PartitionKey.builder().addIntField("number", 1).build();
    addPartition(partitioned, key1, "file1");
    // verify that the partitions were added to Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("number=1"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " ORDER BY key", true, Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    // drop a partition and query again
    dropPartition(partitioned, key1);
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionedFileSetProperties(co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) DatasetId(co.cask.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 32 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceTestRun method previewResultsTest.

@Test
public void previewResultsTest() throws Exception {
    DatasetId myTable2 = NAMESPACE_ID.dataset("my_table_2");
    DatasetId myTable3 = NAMESPACE_ID.dataset("my_table_3");
    DatasetId myTable4 = NAMESPACE_ID.dataset("my_table_4");
    DatasetId myTable5 = NAMESPACE_ID.dataset("my_table_5");
    DatasetId myTable6 = NAMESPACE_ID.dataset("my_table_6");
    datasetFramework.addInstance("keyStructValueTable", myTable2, DatasetProperties.EMPTY);
    datasetFramework.addInstance("keyStructValueTable", myTable3, DatasetProperties.EMPTY);
    datasetFramework.addInstance("keyStructValueTable", myTable4, DatasetProperties.EMPTY);
    datasetFramework.addInstance("keyStructValueTable", myTable5, DatasetProperties.EMPTY);
    datasetFramework.addInstance("keyStructValueTable", myTable6, DatasetProperties.EMPTY);
    try {
        QueryHandle handle = exploreService.execute(NAMESPACE_ID, "show tables");
        QueryStatus status = waitForCompletionStatus(handle, 200, TimeUnit.MILLISECONDS, 50);
        Assert.assertEquals(QueryStatus.OpStatus.FINISHED, status.getStatus());
        List<QueryResult> firstPreview = exploreService.previewResults(handle);
        Assert.assertEquals(ImmutableList.of(new QueryResult(ImmutableList.<Object>of(MY_TABLE_NAME)), new QueryResult(ImmutableList.<Object>of(getDatasetHiveName(myTable2))), new QueryResult(ImmutableList.<Object>of(getDatasetHiveName(myTable3))), new QueryResult(ImmutableList.<Object>of(getDatasetHiveName(myTable4))), new QueryResult(ImmutableList.<Object>of(getDatasetHiveName(myTable5)))), firstPreview);
        // test that preview results do not change even when the query cursor is updated by nextResults
        List<QueryResult> endResults = exploreService.nextResults(handle, 100);
        Assert.assertEquals(ImmutableList.of(new QueryResult(ImmutableList.<Object>of(getDatasetHiveName(myTable6)))), endResults);
        List<QueryResult> secondPreview = exploreService.previewResults(handle);
        Assert.assertEquals(firstPreview, secondPreview);
        Assert.assertEquals(ImmutableList.<QueryResult>of(), exploreService.nextResults(handle, 100));
        try {
            // All results are fetched, query should be inactive now
            exploreService.previewResults(handle);
            Assert.fail("HandleNotFoundException expected - query should be inactive.");
        } catch (HandleNotFoundException e) {
            Assert.assertTrue(e.isInactive());
        // Expected exception
        }
        // now test preview on a query that doesn't return any results
        handle = exploreService.execute(NAMESPACE_ID, "select * from " + getDatasetHiveName(myTable2));
        status = waitForCompletionStatus(handle, 200, TimeUnit.MILLISECONDS, 50);
        Assert.assertEquals(QueryStatus.OpStatus.FINISHED, status.getStatus());
        Assert.assertTrue(exploreService.previewResults(handle).isEmpty());
        // calling preview again should return the same thing. it should not throw an exception
        Assert.assertTrue(exploreService.previewResults(handle).isEmpty());
    } finally {
        datasetFramework.deleteInstance(myTable2);
        datasetFramework.deleteInstance(myTable3);
        datasetFramework.deleteInstance(myTable4);
        datasetFramework.deleteInstance(myTable5);
        datasetFramework.deleteInstance(myTable6);
    }
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) QueryHandle(co.cask.cdap.proto.QueryHandle) QueryStatus(co.cask.cdap.proto.QueryStatus) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 33 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class ExploreMetadataTestRun method testGetColumns.

@Test
public void testGetColumns() throws Exception {
    ArrayList<ColumnDesc> expectedColumnDescs = Lists.newArrayList(new ColumnDesc("TABLE_CAT", "STRING", 1, "Catalog name. NULL if not applicable"), new ColumnDesc("TABLE_SCHEM", "STRING", 2, "Schema name"), new ColumnDesc("TABLE_NAME", "STRING", 3, "Table name"), new ColumnDesc("COLUMN_NAME", "STRING", 4, "Column name"), new ColumnDesc("DATA_TYPE", "INT", 5, "SQL type from java.sql.Types"), new ColumnDesc("TYPE_NAME", "STRING", 6, "Data source dependent type name, " + "for a UDT the type name is fully qualified"), new ColumnDesc("COLUMN_SIZE", "INT", 7, "Column size. For char or date types" + " this is the maximum number of characters, for numeric or decimal" + " types this is precision."), new ColumnDesc("BUFFER_LENGTH", "TINYINT", 8, "Unused"), new ColumnDesc("DECIMAL_DIGITS", "INT", 9, "The number of fractional digits"), new ColumnDesc("NUM_PREC_RADIX", "INT", 10, "Radix (typically either 10 or 2)"), new ColumnDesc("NULLABLE", "INT", 11, "Is NULL allowed"), new ColumnDesc("REMARKS", "STRING", 12, "Comment describing column (may be null)"), new ColumnDesc("COLUMN_DEF", "STRING", 13, "Default value (may be null)"), new ColumnDesc("SQL_DATA_TYPE", "INT", 14, "Unused"), new ColumnDesc("SQL_DATETIME_SUB", "INT", 15, "Unused"), new ColumnDesc("CHAR_OCTET_LENGTH", "INT", 16, "For char types the maximum number of bytes in the column"), new ColumnDesc("ORDINAL_POSITION", "INT", 17, "Index of column in table (starting at 1)"), new ColumnDesc("IS_NULLABLE", "STRING", 18, "\"NO\" means column definitely does not " + "allow NULL values; \"YES\" means the column might allow NULL values. " + "An empty string means nobody knows."), new ColumnDesc("SCOPE_CATALOG", "STRING", 19, "Catalog of table that is the scope " + "of a reference attribute (null if DATA_TYPE isn't REF)"), new ColumnDesc("SCOPE_SCHEMA", "STRING", 20, "Schema of table that is the scope of a " + "reference attribute (null if the DATA_TYPE isn't REF)"), new ColumnDesc("SCOPE_TABLE", "STRING", 21, "Table name that this the scope " + "of a reference attribure (null if the DATA_TYPE isn't REF)"), new ColumnDesc("SOURCE_DATA_TYPE", "SMALLINT", 22, "Source type of a distinct type " + "or user-generated Ref type, SQL type from java.sql.Types " + "(null if DATA_TYPE isn't DISTINCT or user-generated REF)"), new ColumnDesc("IS_AUTO_INCREMENT", "STRING", 23, "Indicates whether this column is auto incremented."));
    // Get all columns
    ListenableFuture<ExploreExecutionResult> future = getExploreClient().columns(null, null, "%", "%");
    List<QueryResult> expectedColumns = Lists.newArrayList(getExpectedColumns(NAMESPACE_DATABASE));
    expectedColumns.addAll(getExpectedColumns(OTHER_NAMESPACE_DATABASE));
    assertStatementResult(future, true, expectedColumnDescs, expectedColumns);
    // Get all columns in a namespace
    future = getExploreClient().columns(null, OTHER_NAMESPACE_ID.getNamespace(), "%", "%");
    assertStatementResult(future, true, expectedColumnDescs, getExpectedColumns(OTHER_NAMESPACE_DATABASE));
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 34 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class ExploreResultSetTest method testResultSet.

@Test
public void testResultSet() throws Exception {
    ExploreClient exploreClient = new MockExploreClient(ImmutableMap.of("mock_query", (List<ColumnDesc>) Lists.newArrayList(new ColumnDesc("column1", "STRING", 1, ""), new ColumnDesc("column2", "int", 2, ""), new ColumnDesc("column3", "char", 3, ""), new ColumnDesc("column4", "float", 4, ""), new ColumnDesc("column5", "double", 5, ""), new ColumnDesc("column6", "boolean", 6, ""), new ColumnDesc("column7", "tinyint", 7, ""), new ColumnDesc("column8", "smallint", 8, ""), new ColumnDesc("column9", "bigint", 9, ""), new ColumnDesc("column10", "date", 10, ""), new ColumnDesc("column11", "timestamp", 11, ""), new ColumnDesc("column12", "decimal", 12, ""), new ColumnDesc("column14", "map<string,string>", 13, ""), new ColumnDesc("column15", "array<string>", 14, ""), new ColumnDesc("column16", "struct<name:string,attr:string>", 15, ""))), ImmutableMap.of("mock_query", (List<QueryResult>) Lists.newArrayList(new QueryResult(ImmutableList.<Object>of("value1", 1, "c", 0.1f, 0.2d, true, 0x1, (short) 2, (long) 10, "2014-06-20", "2014-06-20 07:37:00", "1000000000", "\"{\"key1\":\"value1\"}", "[\"a\",\"b\",\"c\"]", "{\"name\":\"first\",\"attr\":\"second\"}")))));
    ResultSet resultSet = new ExploreResultSet(exploreClient.submit(new NamespaceId(ns), "mock_query").get(), new ExploreStatement(null, exploreClient, ns), 0);
    Assert.assertTrue(resultSet.next());
    Assert.assertEquals(resultSet.getObject(1), resultSet.getObject("column1"));
    Assert.assertEquals("value1", resultSet.getString(1));
    Assert.assertEquals(1, resultSet.getInt(2));
    Assert.assertEquals("c", resultSet.getString(3));
    Assert.assertEquals(0.1f, resultSet.getFloat(4), 0.01);
    Assert.assertEquals(0.2d, resultSet.getDouble(5), 0.01);
    Assert.assertEquals(true, resultSet.getBoolean(6));
    Assert.assertEquals(0x1, resultSet.getByte(7));
    Assert.assertEquals(2, resultSet.getShort(8));
    Assert.assertEquals(10, resultSet.getLong(9));
    Assert.assertEquals(Date.valueOf("2014-06-20"), resultSet.getDate(10));
    Assert.assertEquals(Timestamp.valueOf("2014-06-20 07:37:00"), resultSet.getTimestamp(11));
    Assert.assertEquals(new BigDecimal("1000000000"), resultSet.getBigDecimal(12));
    Assert.assertEquals("\"{\"key1\":\"value1\"}", resultSet.getString(13));
    Assert.assertEquals("[\"a\",\"b\",\"c\"]", resultSet.getString(14));
    Assert.assertEquals("{\"name\":\"first\",\"attr\":\"second\"}", resultSet.getString(15));
    Assert.assertFalse(resultSet.next());
    Assert.assertFalse(resultSet.next());
    try {
        resultSet.getObject(1);
    } catch (SQLException e) {
    // Expected: no more rows
    }
}
Also used : MockExploreClient(co.cask.cdap.explore.client.MockExploreClient) ExploreClient(co.cask.cdap.explore.client.ExploreClient) QueryResult(co.cask.cdap.proto.QueryResult) SQLException(java.sql.SQLException) MockExploreClient(co.cask.cdap.explore.client.MockExploreClient) ResultSet(java.sql.ResultSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NamespaceId(co.cask.cdap.proto.id.NamespaceId) ColumnDesc(co.cask.cdap.proto.ColumnDesc) BigDecimal(java.math.BigDecimal) Test(org.junit.Test)

Example 35 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class ExploreQueryExecutorHttpHandler method getQueryResultPreview.

@POST
@Path("data/explore/queries/{id}/preview")
public void getQueryResultPreview(HttpRequest request, HttpResponder responder, @PathParam("id") String id) throws ExploreException {
    // NOTE: this call is a POST because it is not idempotent: cursor of results is moved
    try {
        final QueryHandle handle = QueryHandle.fromId(id);
        List<QueryResult> results;
        if (handle.equals(QueryHandle.NO_OP)) {
            results = Lists.newArrayList();
        } else {
            results = doAs(handle, new Callable<List<QueryResult>>() {

                @Override
                public List<QueryResult> call() throws Exception {
                    return exploreService.previewResults(handle);
                }
            });
        }
        responder.sendJson(HttpResponseStatus.OK, GSON.toJson(results));
    } catch (IllegalArgumentException e) {
        LOG.debug("Got exception:", e);
        responder.sendString(HttpResponseStatus.BAD_REQUEST, e.getMessage());
    } catch (SQLException e) {
        LOG.debug("Got exception:", e);
        responder.sendString(HttpResponseStatus.BAD_REQUEST, String.format("[SQLState %s] %s", e.getSQLState(), e.getMessage()));
    } catch (HandleNotFoundException e) {
        if (e.isInactive()) {
            responder.sendString(HttpResponseStatus.CONFLICT, "Preview is unavailable for inactive queries.");
            return;
        }
        responder.sendStatus(HttpResponseStatus.NOT_FOUND);
    }
}
Also used : HandleNotFoundException(co.cask.cdap.explore.service.HandleNotFoundException) QueryResult(co.cask.cdap.proto.QueryResult) SQLException(java.sql.SQLException) QueryHandle(co.cask.cdap.proto.QueryHandle) Callable(java.util.concurrent.Callable) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST)

Aggregations

QueryResult (co.cask.cdap.proto.QueryResult)39 ColumnDesc (co.cask.cdap.proto.ColumnDesc)23 Test (org.junit.Test)18 DatasetId (co.cask.cdap.proto.id.DatasetId)16 ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)9 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)8 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 Location (org.apache.twill.filesystem.Location)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 ImmutableList (com.google.common.collect.ImmutableList)6 SQLException (java.sql.SQLException)6 HandleNotFoundException (co.cask.cdap.explore.service.HandleNotFoundException)5 QueryHandle (co.cask.cdap.proto.QueryHandle)4 StreamId (co.cask.cdap.proto.id.StreamId)4 Schema (co.cask.cdap.api.data.schema.Schema)3 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties)3 Table (co.cask.cdap.api.dataset.table.Table)3 ExploreClient (co.cask.cdap.explore.client.ExploreClient)3 MockExploreClient (co.cask.cdap.explore.client.MockExploreClient)3