Search in sources :

Example 1 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class GenerateClientUsageExample method queryClient.

public void queryClient() throws Exception {
    // Construct the client used to interact with CDAP
    QueryClient queryClient = new QueryClient(clientConfig);
    // Perform an ad-hoc query using the Purchase example
    ListenableFuture<ExploreExecutionResult> resultFuture = queryClient.execute(NamespaceId.DEFAULT, "SELECT * FROM dataset_history WHERE customer IN ('Alice','Bob')");
    ExploreExecutionResult results = resultFuture.get();
    // Fetch schema
    List<ColumnDesc> schema = results.getResultSchema();
    String[] header = new String[schema.size()];
    for (int i = 0; i < header.length; i++) {
        ColumnDesc column = schema.get(i);
        // Hive columns start at 1
        int index = column.getPosition() - 1;
        header[index] = column.getName() + ": " + column.getType();
    }
}
Also used : ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) QueryClient(co.cask.cdap.client.QueryClient)

Example 2 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class ExploreStatementTest method executeTest.

@Test
public void executeTest() throws Exception {
    List<ColumnDesc> columnDescriptions = Lists.newArrayList(new ColumnDesc("column1", "STRING", 1, ""));
    List<QueryResult> queryResults = Lists.newArrayList();
    ExploreClient exploreClient = new MockExploreClient(ImmutableMap.of("mock_query_1", columnDescriptions, "mock_query_2", columnDescriptions, "mock_query_3", columnDescriptions, "mock_query_4", columnDescriptions), ImmutableMap.of("mock_query_1", queryResults, "mock_query_2", queryResults, "mock_query_3", queryResults, "mock_query_4", queryResults));
    // Make sure an empty query still has a ResultSet associated to it
    ExploreStatement statement = new ExploreStatement(null, exploreClient, "ns1");
    Assert.assertTrue(statement.execute("mock_query_1"));
    ResultSet rs = statement.getResultSet();
    Assert.assertNotNull(rs);
    Assert.assertFalse(rs.isClosed());
    Assert.assertFalse(rs.next());
    rs = statement.executeQuery("mock_query_2");
    Assert.assertNotNull(rs);
    Assert.assertFalse(rs.isClosed());
    Assert.assertFalse(rs.next());
    // Make sure subsequent calls to an execute method close the previous results
    ResultSet rs2 = statement.executeQuery("mock_query_3");
    Assert.assertTrue(rs.isClosed());
    Assert.assertNotNull(rs2);
    Assert.assertFalse(rs2.isClosed());
    Assert.assertFalse(rs2.next());
    Assert.assertTrue(statement.execute("mock_query_4"));
    Assert.assertTrue(rs2.isClosed());
}
Also used : ExploreClient(co.cask.cdap.explore.client.ExploreClient) MockExploreClient(co.cask.cdap.explore.client.MockExploreClient) QueryResult(co.cask.cdap.proto.QueryResult) MockExploreClient(co.cask.cdap.explore.client.MockExploreClient) ResultSet(java.sql.ResultSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) Test(org.junit.Test)

Example 3 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class StaticEmptyExploreResultSet method getMetaData.

@Override
public ResultSetMetaData getMetaData() throws SQLException {
    if (isClosed()) {
        throw new SQLException("Resultset is closed");
    }
    ImmutableList.Builder<ColumnDesc> builder = ImmutableList.builder();
    for (int i = 0; i < schema.size(); i++) {
        ImmutablePair<String, String> pair = schema.get(i);
        builder.add(new ColumnDesc(pair.getFirst(), pair.getSecond(), i + 1, ""));
    }
    return new ExploreResultSetMetaData(builder.build());
}
Also used : SQLException(java.sql.SQLException) ImmutableList(com.google.common.collect.ImmutableList) ColumnDesc(co.cask.cdap.proto.ColumnDesc)

Example 4 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedAvroSchemaUpdate.

@Test
public void testPartitionedAvroSchemaUpdate() throws Exception {
    final DatasetId datasetId = NAMESPACE_ID.dataset("avroupd");
    final String tableName = getDatasetHiveName(datasetId);
    // create a time partitioned file set
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // Accessing dataset instance to perform data operations
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add a partition
    Location location4 = fileSet.getLocation("file4/nn");
    FileWriterHelper.generateAvroFile(location4.getOutputStream(), "x", 4, 5);
    addPartition(partitioned, PartitionKey.builder().addIntField("number", 4).build(), "file4");
    // new partition should have new format, validate with query
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "STRING", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
    new QueryResult(Lists.<Object>newArrayList("x4", "#4", 4))));
    // update the partitioned file set
    datasetFramework.updateInstance(datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".number", "INT", 2, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
    new QueryResult(Lists.<Object>newArrayList("x4", 4))));
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) DatasetId(co.cask.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 5 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testCreateAddAlterDrop.

private void testCreateAddAlterDrop(@Nullable String dbName, @Nullable String tableName) throws Exception {
    DatasetId datasetInstanceId = NAMESPACE_ID.dataset("files");
    String hiveTableName = getDatasetHiveName(datasetInstanceId);
    String showTablesCommand = "show tables";
    FileSetProperties.Builder props = FileSetProperties.builder().setBasePath("myPath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString());
    if (tableName != null) {
        props.setExploreTableName(tableName);
        hiveTableName = tableName;
    }
    String queryTableName = hiveTableName;
    if (dbName != null) {
        props.setExploreDatabaseName(dbName);
        runCommand(NAMESPACE_ID, "create database " + dbName, false, null, null);
        showTablesCommand += " in " + dbName;
        queryTableName = dbName + "." + queryTableName;
    }
    // create a time partitioned file set
    datasetFramework.addInstance("fileSet", datasetInstanceId, props.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // Accessing dataset instance to perform data operations
    FileSet fileSet = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(fileSet);
    // add a file
    FileWriterHelper.generateAvroFile(fileSet.getLocation("file1").getOutputStream(), "a", 0, 3);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null), new ColumnDesc(hiveTableName + ".value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("a0", "#0")), new QueryResult(Lists.<Object>newArrayList("a1", "#1")), new QueryResult(Lists.<Object>newArrayList("a2", "#2"))));
    // add another file
    FileWriterHelper.generateAvroFile(fileSet.getLocation("file2").getOutputStream(), "b", 3, 5);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(5L))));
    // disable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(false).build());
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
    // re-enable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(true).build());
    // verify that we can query again
    runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(5L))));
    // change the explore schema by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName + " ORDER BY key", true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("a0")), new QueryResult(Lists.<Object>newArrayList("a1")), new QueryResult(Lists.<Object>newArrayList("a2")), new QueryResult(Lists.<Object>newArrayList("b3")), new QueryResult(Lists.<Object>newArrayList("b4"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
    // drop the database if needed
    if (dbName != null) {
        runCommand(NAMESPACE_ID, "drop database " + dbName, false, null, null);
    }
}
Also used : FileSetProperties(co.cask.cdap.api.dataset.lib.FileSetProperties) PartitionedFileSetProperties(co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties) QueryResult(co.cask.cdap.proto.QueryResult) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) DatasetId(co.cask.cdap.proto.id.DatasetId)

Aggregations

ColumnDesc (co.cask.cdap.proto.ColumnDesc)38 QueryResult (co.cask.cdap.proto.QueryResult)23 Test (org.junit.Test)21 DatasetId (co.cask.cdap.proto.id.DatasetId)14 ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)8 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 SQLException (java.sql.SQLException)7 Location (org.apache.twill.filesystem.Location)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 Schema (co.cask.cdap.api.data.schema.Schema)4 ExploreClient (co.cask.cdap.explore.client.ExploreClient)4 MockExploreClient (co.cask.cdap.explore.client.MockExploreClient)4 QueryStatus (co.cask.cdap.proto.QueryStatus)4 StreamId (co.cask.cdap.proto.id.StreamId)4 ResultSet (java.sql.ResultSet)4 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties)3 Table (co.cask.cdap.api.dataset.table.Table)3 QueryHandle (co.cask.cdap.proto.QueryHandle)3