Search in sources :

Example 1 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class ExploreStatementTest method executeTest.

@Test
public void executeTest() throws Exception {
    List<ColumnDesc> columnDescriptions = Lists.newArrayList(new ColumnDesc("column1", "STRING", 1, ""));
    List<QueryResult> queryResults = Lists.newArrayList();
    ExploreClient exploreClient = new MockExploreClient(ImmutableMap.of("mock_query_1", columnDescriptions, "mock_query_2", columnDescriptions, "mock_query_3", columnDescriptions, "mock_query_4", columnDescriptions), ImmutableMap.of("mock_query_1", queryResults, "mock_query_2", queryResults, "mock_query_3", queryResults, "mock_query_4", queryResults));
    // Make sure an empty query still has a ResultSet associated to it
    ExploreStatement statement = new ExploreStatement(null, exploreClient, "ns1");
    Assert.assertTrue(statement.execute("mock_query_1"));
    ResultSet rs = statement.getResultSet();
    Assert.assertNotNull(rs);
    Assert.assertFalse(rs.isClosed());
    Assert.assertFalse(rs.next());
    rs = statement.executeQuery("mock_query_2");
    Assert.assertNotNull(rs);
    Assert.assertFalse(rs.isClosed());
    Assert.assertFalse(rs.next());
    // Make sure subsequent calls to an execute method close the previous results
    ResultSet rs2 = statement.executeQuery("mock_query_3");
    Assert.assertTrue(rs.isClosed());
    Assert.assertNotNull(rs2);
    Assert.assertFalse(rs2.isClosed());
    Assert.assertFalse(rs2.next());
    Assert.assertTrue(statement.execute("mock_query_4"));
    Assert.assertTrue(rs2.isClosed());
}
Also used : ExploreClient(co.cask.cdap.explore.client.ExploreClient) MockExploreClient(co.cask.cdap.explore.client.MockExploreClient) QueryResult(co.cask.cdap.proto.QueryResult) MockExploreClient(co.cask.cdap.explore.client.MockExploreClient) ResultSet(java.sql.ResultSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) Test(org.junit.Test)

Example 2 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedAvroSchemaUpdate.

@Test
public void testPartitionedAvroSchemaUpdate() throws Exception {
    final DatasetId datasetId = NAMESPACE_ID.dataset("avroupd");
    final String tableName = getDatasetHiveName(datasetId);
    // create a time partitioned file set
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // Accessing dataset instance to perform data operations
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add a partition
    Location location4 = fileSet.getLocation("file4/nn");
    FileWriterHelper.generateAvroFile(location4.getOutputStream(), "x", 4, 5);
    addPartition(partitioned, PartitionKey.builder().addIntField("number", 4).build(), "file4");
    // new partition should have new format, validate with query
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "STRING", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
    new QueryResult(Lists.<Object>newArrayList("x4", "#4", 4))));
    // update the partitioned file set
    datasetFramework.updateInstance(datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".number", "INT", 2, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
    new QueryResult(Lists.<Object>newArrayList("x4", 4))));
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) DatasetId(co.cask.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 3 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testCreateAddAlterDrop.

private void testCreateAddAlterDrop(@Nullable String dbName, @Nullable String tableName) throws Exception {
    DatasetId datasetInstanceId = NAMESPACE_ID.dataset("files");
    String hiveTableName = getDatasetHiveName(datasetInstanceId);
    String showTablesCommand = "show tables";
    FileSetProperties.Builder props = FileSetProperties.builder().setBasePath("myPath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString());
    if (tableName != null) {
        props.setExploreTableName(tableName);
        hiveTableName = tableName;
    }
    String queryTableName = hiveTableName;
    if (dbName != null) {
        props.setExploreDatabaseName(dbName);
        runCommand(NAMESPACE_ID, "create database " + dbName, false, null, null);
        showTablesCommand += " in " + dbName;
        queryTableName = dbName + "." + queryTableName;
    }
    // create a time partitioned file set
    datasetFramework.addInstance("fileSet", datasetInstanceId, props.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
    // Accessing dataset instance to perform data operations
    FileSet fileSet = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(fileSet);
    // add a file
    FileWriterHelper.generateAvroFile(fileSet.getLocation("file1").getOutputStream(), "a", 0, 3);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null), new ColumnDesc(hiveTableName + ".value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("a0", "#0")), new QueryResult(Lists.<Object>newArrayList("a1", "#1")), new QueryResult(Lists.<Object>newArrayList("a2", "#2"))));
    // add another file
    FileWriterHelper.generateAvroFile(fileSet.getLocation("file2").getOutputStream(), "b", 3, 5);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(5L))));
    // disable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(false).build());
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
    // re-enable explore by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(true).build());
    // verify that we can query again
    runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(5L))));
    // change the explore schema by updating the props
    datasetFramework.updateInstance(datasetInstanceId, props.setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName + " ORDER BY key", true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("a0")), new QueryResult(Lists.<Object>newArrayList("a1")), new QueryResult(Lists.<Object>newArrayList("a2")), new QueryResult(Lists.<Object>newArrayList("b3")), new QueryResult(Lists.<Object>newArrayList("b4"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
    // drop the database if needed
    if (dbName != null) {
        runCommand(NAMESPACE_ID, "drop database " + dbName, false, null, null);
    }
}
Also used : FileSetProperties(co.cask.cdap.api.dataset.lib.FileSetProperties) PartitionedFileSetProperties(co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties) QueryResult(co.cask.cdap.proto.QueryResult) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) DatasetId(co.cask.cdap.proto.id.DatasetId)

Example 4 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testOrcFileset.

@Test
public void testOrcFileset() throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("orcfiles");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    datasetFramework.addInstance("fileSet", datasetInstanceId, FileSetProperties.builder().setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.ql.io.orc.OrcSerde").setExploreInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").setExploreSchema("id int, name string").build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // insert data into the table
    ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, String.format("insert into table %s values (1, 'samuel'), (2, 'dwayne')", tableName)).get();
    result.close();
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, Lists.newArrayList(new ColumnDesc(tableName + ".id", "INT", 1, null), new ColumnDesc(tableName + ".name", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(1, "samuel")), new QueryResult(Lists.<Object>newArrayList(2, "dwayne"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 5 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testTimePartitionedFileSet.

@Test
public void testTimePartitionedFileSet() throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parts");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    TimePartitionedFileSet tpfs = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(tpfs);
    Assert.assertTrue(tpfs instanceof TransactionAware);
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    long time1 = DATE_FORMAT.parse("12/10/14 1:00 am").getTime();
    long time2 = DATE_FORMAT.parse("12/10/14 2:00 am").getTime();
    long time3 = DATE_FORMAT.parse("12/10/14 3:00 am").getTime();
    Location location1 = tpfs.getEmbeddedFileSet().getLocation("file1/nn");
    Location location2 = tpfs.getEmbeddedFileSet().getLocation("file2/nn");
    Location location3 = tpfs.getEmbeddedFileSet().getLocation("file3/nn");
    FileWriterHelper.generateAvroFile(location1.getOutputStream(), "x", 1, 2);
    FileWriterHelper.generateAvroFile(location2.getOutputStream(), "y", 2, 3);
    FileWriterHelper.generateAvroFile(location3.getOutputStream(), "x", 3, 4);
    addTimePartition(tpfs, time1, "file1");
    addTimePartition(tpfs, time2, "file2");
    addTimePartition(tpfs, time3, "file3");
    // verify that the partitions were added to Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=2/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3")), new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " WHERE hour = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // remove a partition
    dropTimePartition(tpfs, time2);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3"))));
    // verify the partition was removed from Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) TransactionAware(org.apache.tephra.TransactionAware) ColumnDesc(co.cask.cdap.proto.ColumnDesc) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) DatasetId(co.cask.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

QueryResult (co.cask.cdap.proto.QueryResult)39 ColumnDesc (co.cask.cdap.proto.ColumnDesc)23 Test (org.junit.Test)18 DatasetId (co.cask.cdap.proto.id.DatasetId)16 ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)9 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)8 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 Location (org.apache.twill.filesystem.Location)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 ImmutableList (com.google.common.collect.ImmutableList)6 SQLException (java.sql.SQLException)6 HandleNotFoundException (co.cask.cdap.explore.service.HandleNotFoundException)5 QueryHandle (co.cask.cdap.proto.QueryHandle)4 StreamId (co.cask.cdap.proto.id.StreamId)4 Schema (co.cask.cdap.api.data.schema.Schema)3 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties)3 Table (co.cask.cdap.api.dataset.table.Table)3 ExploreClient (co.cask.cdap.explore.client.ExploreClient)3 MockExploreClient (co.cask.cdap.explore.client.MockExploreClient)3