Search in sources :

Example 36 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.

private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
    final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
    File path = new File(tmpFolder.newFolder(), "base");
    String tableName = "reuse";
    // create a PFS in order to create a table in Hive and add a partition
    // create a time partitioned file set
    DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
    PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(dummy);
    Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
    PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
    FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
    addPartition(dummy, key, "number1");
    // validate data
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.updateInstance(datasetInstanceId, props);
    // validate data
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
    }
    datasetFramework.deleteInstance(datasetInstanceId);
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
    }
    datasetFramework.deleteInstance(dummyInstanceId);
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) File(java.io.File) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 37 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class HiveExploreServiceFileSetTestRun method testTimePartitionedFileSet.

@Test
public void testTimePartitionedFileSet() throws Exception {
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parts");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    // create a time partitioned file set
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    TimePartitionedFileSet tpfs = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(tpfs);
    Assert.assertTrue(tpfs instanceof TransactionAware);
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    long time1 = DATE_FORMAT.parse("12/10/14 1:00 am").getTime();
    long time2 = DATE_FORMAT.parse("12/10/14 2:00 am").getTime();
    long time3 = DATE_FORMAT.parse("12/10/14 3:00 am").getTime();
    Location location1 = tpfs.getEmbeddedFileSet().getLocation("file1/nn");
    Location location2 = tpfs.getEmbeddedFileSet().getLocation("file2/nn");
    Location location3 = tpfs.getEmbeddedFileSet().getLocation("file3/nn");
    FileWriterHelper.generateAvroFile(location1.getOutputStream(), "x", 1, 2);
    FileWriterHelper.generateAvroFile(location2.getOutputStream(), "y", 2, 3);
    FileWriterHelper.generateAvroFile(location3.getOutputStream(), "x", 3, 4);
    addTimePartition(tpfs, time1, "file1");
    addTimePartition(tpfs, time2, "file2");
    addTimePartition(tpfs, time3, "file3");
    // verify that the partitions were added to Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=2/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3")), new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " WHERE hour = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
    // remove a partition
    dropTimePartition(tpfs, time2);
    // verify that we can query the key-values in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3"))));
    // verify the partition was removed from Hive
    runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) TransactionAware(org.apache.tephra.TransactionAware) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 38 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class InMemoryExploreServiceTest method runNamespacedTest.

private void runNamespacedTest(String namespace) throws Exception {
    URL loadFileUrl = getClass().getResource("/test_table.dat");
    Assert.assertNotNull(loadFileUrl);
    // Should have no tables
    runCommand(namespace, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), ImmutableList.<QueryResult>of());
    runCommand(namespace, "create table test (first INT, second STRING) ROW FORMAT " + "DELIMITED FIELDS TERMINATED BY '\\t'", false, ImmutableList.<ColumnDesc>of(), ImmutableList.<QueryResult>of());
    runCommand(namespace, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("test"))));
    runCommand(namespace, "describe test", true, Lists.newArrayList(new ColumnDesc("col_name", "STRING", 1, "from deserializer"), new ColumnDesc("data_type", "STRING", 2, "from deserializer"), new ColumnDesc("comment", "STRING", 3, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("first", "int", "")), new QueryResult(Lists.<Object>newArrayList("second", "string", ""))));
    // Should have no data
    runCommand(namespace, "select * from test", true, Lists.newArrayList(new ColumnDesc("test.first", "INT", 1, null), new ColumnDesc("test.second", "STRING", 2, null)), ImmutableList.<QueryResult>of());
    runCommand(namespace, "LOAD DATA LOCAL INPATH '" + new File(loadFileUrl.toURI()).getAbsolutePath() + "' INTO TABLE test", false, ImmutableList.<ColumnDesc>of(), ImmutableList.<QueryResult>of());
    runCommand(namespace, "select first, second from test", true, Lists.newArrayList(new ColumnDesc("first", "INT", 1, null), new ColumnDesc("second", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("1", "one")), new QueryResult(Lists.<Object>newArrayList("2", "two")), new QueryResult(Lists.<Object>newArrayList("3", "three")), new QueryResult(Lists.<Object>newArrayList("4", "four")), new QueryResult(Lists.<Object>newArrayList("5", "five"))));
    runCommand(namespace, "select * from test", true, Lists.newArrayList(new ColumnDesc("test.first", "INT", 1, null), new ColumnDesc("test.second", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("1", "one")), new QueryResult(Lists.<Object>newArrayList("2", "two")), new QueryResult(Lists.<Object>newArrayList("3", "three")), new QueryResult(Lists.<Object>newArrayList("4", "four")), new QueryResult(Lists.<Object>newArrayList("5", "five"))));
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) File(java.io.File) URL(java.net.URL)

Example 39 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by caskdata.

the class StaticEmptyExploreResultSet method getMetaData.

@Override
public ResultSetMetaData getMetaData() throws SQLException {
    if (isClosed()) {
        throw new SQLException("Resultset is closed");
    }
    ImmutableList.Builder<ColumnDesc> builder = ImmutableList.builder();
    for (int i = 0; i < schema.size(); i++) {
        ImmutablePair<String, String> pair = schema.get(i);
        builder.add(new ColumnDesc(pair.getFirst(), pair.getSecond(), i + 1, ""));
    }
    return new ExploreResultSetMetaData(builder.build());
}
Also used : SQLException(java.sql.SQLException) ImmutableList(com.google.common.collect.ImmutableList) ColumnDesc(io.cdap.cdap.proto.ColumnDesc)

Example 40 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by caskdata.

the class ExploreResultSetTest method testResultSet.

@Test
public void testResultSet() throws Exception {
    ExploreClient exploreClient = new MockExploreClient(ImmutableMap.of("mock_query", (List<ColumnDesc>) Lists.newArrayList(new ColumnDesc("column1", "STRING", 1, ""), new ColumnDesc("column2", "int", 2, ""), new ColumnDesc("column3", "char", 3, ""), new ColumnDesc("column4", "float", 4, ""), new ColumnDesc("column5", "double", 5, ""), new ColumnDesc("column6", "boolean", 6, ""), new ColumnDesc("column7", "tinyint", 7, ""), new ColumnDesc("column8", "smallint", 8, ""), new ColumnDesc("column9", "bigint", 9, ""), new ColumnDesc("column10", "date", 10, ""), new ColumnDesc("column11", "timestamp", 11, ""), new ColumnDesc("column12", "decimal", 12, ""), new ColumnDesc("column14", "map<string,string>", 13, ""), new ColumnDesc("column15", "array<string>", 14, ""), new ColumnDesc("column16", "struct<name:string,attr:string>", 15, ""))), ImmutableMap.of("mock_query", (List<QueryResult>) Lists.newArrayList(new QueryResult(ImmutableList.<Object>of("value1", 1, "c", 0.1f, 0.2d, true, 0x1, (short) 2, (long) 10, "2014-06-20", "2014-06-20 07:37:00", "1000000000", "\"{\"key1\":\"value1\"}", "[\"a\",\"b\",\"c\"]", "{\"name\":\"first\",\"attr\":\"second\"}")))));
    ResultSet resultSet = new ExploreResultSet(exploreClient.submit(new NamespaceId(ns), "mock_query").get(), new ExploreStatement(null, exploreClient, ns), 0);
    Assert.assertTrue(resultSet.next());
    Assert.assertEquals(resultSet.getObject(1), resultSet.getObject("column1"));
    Assert.assertEquals("value1", resultSet.getString(1));
    Assert.assertEquals(1, resultSet.getInt(2));
    Assert.assertEquals("c", resultSet.getString(3));
    Assert.assertEquals(0.1f, resultSet.getFloat(4), 0.01);
    Assert.assertEquals(0.2d, resultSet.getDouble(5), 0.01);
    Assert.assertEquals(true, resultSet.getBoolean(6));
    Assert.assertEquals(0x1, resultSet.getByte(7));
    Assert.assertEquals(2, resultSet.getShort(8));
    Assert.assertEquals(10, resultSet.getLong(9));
    Assert.assertEquals(Date.valueOf("2014-06-20"), resultSet.getDate(10));
    Assert.assertEquals(Timestamp.valueOf("2014-06-20 07:37:00"), resultSet.getTimestamp(11));
    Assert.assertEquals(new BigDecimal("1000000000"), resultSet.getBigDecimal(12));
    Assert.assertEquals("\"{\"key1\":\"value1\"}", resultSet.getString(13));
    Assert.assertEquals("[\"a\",\"b\",\"c\"]", resultSet.getString(14));
    Assert.assertEquals("{\"name\":\"first\",\"attr\":\"second\"}", resultSet.getString(15));
    Assert.assertFalse(resultSet.next());
    Assert.assertFalse(resultSet.next());
    try {
        resultSet.getObject(1);
    } catch (SQLException e) {
    // Expected: no more rows
    }
}
Also used : ExploreClient(io.cdap.cdap.explore.client.ExploreClient) MockExploreClient(io.cdap.cdap.explore.client.MockExploreClient) QueryResult(io.cdap.cdap.proto.QueryResult) SQLException(java.sql.SQLException) MockExploreClient(io.cdap.cdap.explore.client.MockExploreClient) ResultSet(java.sql.ResultSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) BigDecimal(java.math.BigDecimal) Test(org.junit.Test)

Aggregations

ColumnDesc (io.cdap.cdap.proto.ColumnDesc)72 QueryResult (io.cdap.cdap.proto.QueryResult)46 Test (org.junit.Test)42 DatasetId (io.cdap.cdap.proto.id.DatasetId)32 ExploreExecutionResult (io.cdap.cdap.explore.client.ExploreExecutionResult)24 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)18 Location (org.apache.twill.filesystem.Location)16 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)14 SQLException (java.sql.SQLException)14 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)12 Schema (io.cdap.cdap.api.data.schema.Schema)8 Table (io.cdap.cdap.api.dataset.table.Table)8 ExploreClient (io.cdap.cdap.explore.client.ExploreClient)8 MockExploreClient (io.cdap.cdap.explore.client.MockExploreClient)8 QueryStatus (io.cdap.cdap.proto.QueryStatus)8 ResultSet (java.sql.ResultSet)8 ImmutableList (com.google.common.collect.ImmutableList)6 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)6 PartitionedFileSetProperties (io.cdap.cdap.api.dataset.lib.PartitionedFileSetProperties)6 QueryHandle (io.cdap.cdap.proto.QueryHandle)6