Search in sources :

Example 1 with QueryResult

use of io.cdap.cdap.proto.QueryResult in project cdap by caskdata.

the class ExploreMetadataTestRun method testGetColumns.

@Test
public void testGetColumns() throws Exception {
    ArrayList<ColumnDesc> expectedColumnDescs = Lists.newArrayList(new ColumnDesc("TABLE_CAT", "STRING", 1, "Catalog name. NULL if not applicable"), new ColumnDesc("TABLE_SCHEM", "STRING", 2, "Schema name"), new ColumnDesc("TABLE_NAME", "STRING", 3, "Table name"), new ColumnDesc("COLUMN_NAME", "STRING", 4, "Column name"), new ColumnDesc("DATA_TYPE", "INT", 5, "SQL type from java.sql.Types"), new ColumnDesc("TYPE_NAME", "STRING", 6, "Data source dependent type name, " + "for a UDT the type name is fully qualified"), new ColumnDesc("COLUMN_SIZE", "INT", 7, "Column size. For char or date types" + " this is the maximum number of characters, for numeric or decimal" + " types this is precision."), new ColumnDesc("BUFFER_LENGTH", "TINYINT", 8, "Unused"), new ColumnDesc("DECIMAL_DIGITS", "INT", 9, "The number of fractional digits"), new ColumnDesc("NUM_PREC_RADIX", "INT", 10, "Radix (typically either 10 or 2)"), new ColumnDesc("NULLABLE", "INT", 11, "Is NULL allowed"), new ColumnDesc("REMARKS", "STRING", 12, "Comment describing column (may be null)"), new ColumnDesc("COLUMN_DEF", "STRING", 13, "Default value (may be null)"), new ColumnDesc("SQL_DATA_TYPE", "INT", 14, "Unused"), new ColumnDesc("SQL_DATETIME_SUB", "INT", 15, "Unused"), new ColumnDesc("CHAR_OCTET_LENGTH", "INT", 16, "For char types the maximum number of bytes in the column"), new ColumnDesc("ORDINAL_POSITION", "INT", 17, "Index of column in table (starting at 1)"), new ColumnDesc("IS_NULLABLE", "STRING", 18, "\"NO\" means column definitely does not " + "allow NULL values; \"YES\" means the column might allow NULL values. " + "An empty string means nobody knows."), new ColumnDesc("SCOPE_CATALOG", "STRING", 19, "Catalog of table that is the scope " + "of a reference attribute (null if DATA_TYPE isn't REF)"), new ColumnDesc("SCOPE_SCHEMA", "STRING", 20, "Schema of table that is the scope of a " + "reference attribute (null if the DATA_TYPE isn't REF)"), new ColumnDesc("SCOPE_TABLE", "STRING", 21, "Table name that this the scope " + "of a reference attribure (null if the DATA_TYPE isn't REF)"), new ColumnDesc("SOURCE_DATA_TYPE", "SMALLINT", 22, "Source type of a distinct type " + "or user-generated Ref type, SQL type from java.sql.Types " + "(null if DATA_TYPE isn't DISTINCT or user-generated REF)"), new ColumnDesc("IS_AUTO_INCREMENT", "STRING", 23, "Indicates whether this column is auto incremented."));
    // Get all columns
    ListenableFuture<ExploreExecutionResult> future = getExploreClient().columns(null, null, "%", "%");
    List<QueryResult> expectedColumns = Lists.newArrayList(getExpectedColumns(NAMESPACE_DATABASE));
    expectedColumns.addAll(getExpectedColumns(OTHER_NAMESPACE_DATABASE));
    assertStatementResult(future, true, expectedColumnDescs, expectedColumns);
    // Get all columns in a namespace
    future = getExploreClient().columns(null, OTHER_NAMESPACE_ID.getNamespace(), "%", "%");
    assertStatementResult(future, true, expectedColumnDescs, getExpectedColumns(OTHER_NAMESPACE_DATABASE));
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 2 with QueryResult

use of io.cdap.cdap.proto.QueryResult in project cdap by caskdata.

the class BaseHiveExploreServiceTest method trimColumnValues.

protected static List<QueryResult> trimColumnValues(Iterator<QueryResult> results) {
    int i = 0;
    List<QueryResult> newResults = Lists.newArrayList();
    // Max 100 results
    while (results.hasNext() && i < 100) {
        i++;
        QueryResult result = results.next();
        List<Object> newCols = Lists.newArrayList();
        for (Object obj : result.getColumns()) {
            if (obj instanceof String) {
                newCols.add(((String) obj).trim());
            } else if (obj instanceof Double) {
                // NOTE: this means only use 4 decimals for double and float values in test cases
                newCols.add((double) Math.round((Double) obj * 10000) / 10000);
            } else {
                newCols.add(obj);
            }
        }
        newResults.add(new QueryResult(newCols));
    }
    return newResults;
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult)

Example 3 with QueryResult

use of io.cdap.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testTimePartitionedFileSet.

@Test
public void testTimePartitionedFileSet() throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parts");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    TimePartitionedFileSet tpfs = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(tpfs);
    Assert.assertTrue(tpfs instanceof TransactionAware);
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    long time1 = DATE_FORMAT.parse("12/10/14 1:00 am").getTime();
    long time2 = DATE_FORMAT.parse("12/10/14 2:00 am").getTime();
    long time3 = DATE_FORMAT.parse("12/10/14 3:00 am").getTime();
    Location location1 = tpfs.getEmbeddedFileSet().getLocation("file1/nn");
    Location location2 = tpfs.getEmbeddedFileSet().getLocation("file2/nn");
    Location location3 = tpfs.getEmbeddedFileSet().getLocation("file3/nn");
    FileWriterHelper.generateAvroFile(location1.getOutputStream(), "x", 1, 2);
    FileWriterHelper.generateAvroFile(location2.getOutputStream(), "y", 2, 3);
    FileWriterHelper.generateAvroFile(location3.getOutputStream(), "x", 3, 4);
    addTimePartition(tpfs, time1, "file1");
    addTimePartition(tpfs, time2, "file2");
    addTimePartition(tpfs, time3, "file3");
    // verify that the partitions were added to Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=2/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3")), new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " WHERE hour = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // remove a partition
    dropTimePartition(tpfs, time2);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3"))));
    // verify the partition was removed from Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) TransactionAware(org.apache.tephra.TransactionAware) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 4 with QueryResult

use of io.cdap.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedTextFile.

// this tests mainly the support for different text formats. Other features (partitioning etc.) are tested above.
private void testPartitionedTextFile(String name, String format, String delim, String fileDelim) throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset(name);
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    PartitionedFileSetProperties.Builder builder = (PartitionedFileSetProperties.Builder) PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(name).setEnableExploreOnCreate(true).setExploreSchema("key STRING, value INT").setExploreFormat(format);
    if (delim != null) {
        builder.setExploreFormatProperty("delimiter", delim);
    }
    datasetFramework.addInstance("partitionedFileSet", datasetInstanceId, builder.build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    FileSet fileSet = partitioned.getEmbeddedFileSet();
    // add a partitions. Beware that Hive expects a partition to be a directory, so we create a dir with one file
    Location location1 = fileSet.getLocation("file1/nn");
    FileWriterHelper.generateTextFile(location1.getOutputStream(), fileDelim, "x", 1, 2);
    PartitionKey key1 = PartitionKey.builder().addIntField("number", 1).build();
    addPartition(partitioned, key1, "file1");
    // verify that the partitions were added to Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("number=1"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " ORDER BY key", true, Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    // drop a partition and query again
    dropPartition(partitioned, key1);
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSetProperties(io.cdap.cdap.api.dataset.lib.PartitionedFileSetProperties) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 5 with QueryResult

use of io.cdap.cdap.proto.QueryResult in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.

private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
    final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
    File path = new File(tmpFolder.newFolder(), "base");
    String tableName = "reuse";
    // create a PFS in order to create a table in Hive and add a partition
    // create a time partitioned file set
    DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
    PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(dummy);
    Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
    PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
    FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
    addPartition(dummy, key, "number1");
    // validate data
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.updateInstance(datasetInstanceId, props);
    // validate data
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
    }
    datasetFramework.deleteInstance(datasetInstanceId);
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
    }
    datasetFramework.deleteInstance(dummyInstanceId);
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) File(java.io.File) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Aggregations

QueryResult (io.cdap.cdap.proto.QueryResult)37 ColumnDesc (io.cdap.cdap.proto.ColumnDesc)23 DatasetId (io.cdap.cdap.proto.id.DatasetId)18 Test (org.junit.Test)18 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)9 ExploreExecutionResult (io.cdap.cdap.explore.client.ExploreExecutionResult)9 Location (org.apache.twill.filesystem.Location)8 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)7 ImmutableList (com.google.common.collect.ImmutableList)6 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)6 SQLException (java.sql.SQLException)5 Schema (io.cdap.cdap.api.data.schema.Schema)4 Table (io.cdap.cdap.api.dataset.table.Table)4 HandleNotFoundException (io.cdap.cdap.explore.service.HandleNotFoundException)4 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (io.cdap.cdap.api.dataset.lib.PartitionedFileSetProperties)3 ExploreClient (io.cdap.cdap.explore.client.ExploreClient)3 MockExploreClient (io.cdap.cdap.explore.client.MockExploreClient)3 QueryHandle (io.cdap.cdap.proto.QueryHandle)3 QueryStatus (io.cdap.cdap.proto.QueryStatus)3