use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.
the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.
private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
File path = new File(tmpFolder.newFolder(), "base");
String tableName = "reuse";
// create a PFS in order to create a table in Hive and add a partition
// create a time partitioned file set
DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(dummy);
Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
addPartition(dummy, key, "number1");
// validate data
List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(partitioned);
props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
datasetFramework.updateInstance(datasetInstanceId, props);
// validate data
if (!possessed) {
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
} else {
List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
}
datasetFramework.deleteInstance(datasetInstanceId);
if (!possessed) {
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
} else {
runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
}
datasetFramework.deleteInstance(dummyInstanceId);
}
use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.
the class HiveExploreServiceFileSetTestRun method testTimePartitionedFileSet.
@Test
public void testTimePartitionedFileSet() throws Exception {
final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("parts");
final String tableName = getDatasetHiveName(datasetInstanceId);
// create a time partitioned file set
datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
// verify that the hive table was created for this file set
runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
// Accessing dataset instance to perform data operations
TimePartitionedFileSet tpfs = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(tpfs);
Assert.assertTrue(tpfs instanceof TransactionAware);
// add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
long time1 = DATE_FORMAT.parse("12/10/14 1:00 am").getTime();
long time2 = DATE_FORMAT.parse("12/10/14 2:00 am").getTime();
long time3 = DATE_FORMAT.parse("12/10/14 3:00 am").getTime();
Location location1 = tpfs.getEmbeddedFileSet().getLocation("file1/nn");
Location location2 = tpfs.getEmbeddedFileSet().getLocation("file2/nn");
Location location3 = tpfs.getEmbeddedFileSet().getLocation("file3/nn");
FileWriterHelper.generateAvroFile(location1.getOutputStream(), "x", 1, 2);
FileWriterHelper.generateAvroFile(location2.getOutputStream(), "y", 2, 3);
FileWriterHelper.generateAvroFile(location3.getOutputStream(), "x", 3, 4);
addTimePartition(tpfs, time1, "file1");
addTimePartition(tpfs, time2, "file2");
addTimePartition(tpfs, time3, "file3");
// verify that the partitions were added to Hive
runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=2/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3")), new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " WHERE hour = 2 ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("y2", "#2"))));
// remove a partition
dropTimePartition(tpfs, time2);
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT key, value FROM " + tableName + " ORDER BY key, value", true, Lists.newArrayList(new ColumnDesc("key", "STRING", 1, null), new ColumnDesc("value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", "#1")), new QueryResult(Lists.<Object>newArrayList("x3", "#3"))));
// verify the partition was removed from Hive
runCommand(NAMESPACE_ID, "show partitions " + tableName, true, Lists.newArrayList(new ColumnDesc("partition", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=1/minute=0")), new QueryResult(Lists.<Object>newArrayList("year=2014/month=12/day=10/hour=3/minute=0"))));
// drop the dataset
datasetFramework.deleteInstance(datasetInstanceId);
// verify the Hive table is gone
runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
// verify that the hive table was created for this file set
runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
}
use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.
the class InMemoryExploreServiceTest method runNamespacedTest.
private void runNamespacedTest(String namespace) throws Exception {
URL loadFileUrl = getClass().getResource("/test_table.dat");
Assert.assertNotNull(loadFileUrl);
// Should have no tables
runCommand(namespace, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), ImmutableList.<QueryResult>of());
runCommand(namespace, "create table test (first INT, second STRING) ROW FORMAT " + "DELIMITED FIELDS TERMINATED BY '\\t'", false, ImmutableList.<ColumnDesc>of(), ImmutableList.<QueryResult>of());
runCommand(namespace, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("test"))));
runCommand(namespace, "describe test", true, Lists.newArrayList(new ColumnDesc("col_name", "STRING", 1, "from deserializer"), new ColumnDesc("data_type", "STRING", 2, "from deserializer"), new ColumnDesc("comment", "STRING", 3, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("first", "int", "")), new QueryResult(Lists.<Object>newArrayList("second", "string", ""))));
// Should have no data
runCommand(namespace, "select * from test", true, Lists.newArrayList(new ColumnDesc("test.first", "INT", 1, null), new ColumnDesc("test.second", "STRING", 2, null)), ImmutableList.<QueryResult>of());
runCommand(namespace, "LOAD DATA LOCAL INPATH '" + new File(loadFileUrl.toURI()).getAbsolutePath() + "' INTO TABLE test", false, ImmutableList.<ColumnDesc>of(), ImmutableList.<QueryResult>of());
runCommand(namespace, "select first, second from test", true, Lists.newArrayList(new ColumnDesc("first", "INT", 1, null), new ColumnDesc("second", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("1", "one")), new QueryResult(Lists.<Object>newArrayList("2", "two")), new QueryResult(Lists.<Object>newArrayList("3", "three")), new QueryResult(Lists.<Object>newArrayList("4", "four")), new QueryResult(Lists.<Object>newArrayList("5", "five"))));
runCommand(namespace, "select * from test", true, Lists.newArrayList(new ColumnDesc("test.first", "INT", 1, null), new ColumnDesc("test.second", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("1", "one")), new QueryResult(Lists.<Object>newArrayList("2", "two")), new QueryResult(Lists.<Object>newArrayList("3", "three")), new QueryResult(Lists.<Object>newArrayList("4", "four")), new QueryResult(Lists.<Object>newArrayList("5", "five"))));
}
use of io.cdap.cdap.proto.ColumnDesc in project cdap by caskdata.
the class StaticEmptyExploreResultSet method getMetaData.
@Override
public ResultSetMetaData getMetaData() throws SQLException {
if (isClosed()) {
throw new SQLException("Resultset is closed");
}
ImmutableList.Builder<ColumnDesc> builder = ImmutableList.builder();
for (int i = 0; i < schema.size(); i++) {
ImmutablePair<String, String> pair = schema.get(i);
builder.add(new ColumnDesc(pair.getFirst(), pair.getSecond(), i + 1, ""));
}
return new ExploreResultSetMetaData(builder.build());
}
use of io.cdap.cdap.proto.ColumnDesc in project cdap by caskdata.
the class ExploreResultSetTest method testResultSet.
@Test
public void testResultSet() throws Exception {
ExploreClient exploreClient = new MockExploreClient(ImmutableMap.of("mock_query", (List<ColumnDesc>) Lists.newArrayList(new ColumnDesc("column1", "STRING", 1, ""), new ColumnDesc("column2", "int", 2, ""), new ColumnDesc("column3", "char", 3, ""), new ColumnDesc("column4", "float", 4, ""), new ColumnDesc("column5", "double", 5, ""), new ColumnDesc("column6", "boolean", 6, ""), new ColumnDesc("column7", "tinyint", 7, ""), new ColumnDesc("column8", "smallint", 8, ""), new ColumnDesc("column9", "bigint", 9, ""), new ColumnDesc("column10", "date", 10, ""), new ColumnDesc("column11", "timestamp", 11, ""), new ColumnDesc("column12", "decimal", 12, ""), new ColumnDesc("column14", "map<string,string>", 13, ""), new ColumnDesc("column15", "array<string>", 14, ""), new ColumnDesc("column16", "struct<name:string,attr:string>", 15, ""))), ImmutableMap.of("mock_query", (List<QueryResult>) Lists.newArrayList(new QueryResult(ImmutableList.<Object>of("value1", 1, "c", 0.1f, 0.2d, true, 0x1, (short) 2, (long) 10, "2014-06-20", "2014-06-20 07:37:00", "1000000000", "\"{\"key1\":\"value1\"}", "[\"a\",\"b\",\"c\"]", "{\"name\":\"first\",\"attr\":\"second\"}")))));
ResultSet resultSet = new ExploreResultSet(exploreClient.submit(new NamespaceId(ns), "mock_query").get(), new ExploreStatement(null, exploreClient, ns), 0);
Assert.assertTrue(resultSet.next());
Assert.assertEquals(resultSet.getObject(1), resultSet.getObject("column1"));
Assert.assertEquals("value1", resultSet.getString(1));
Assert.assertEquals(1, resultSet.getInt(2));
Assert.assertEquals("c", resultSet.getString(3));
Assert.assertEquals(0.1f, resultSet.getFloat(4), 0.01);
Assert.assertEquals(0.2d, resultSet.getDouble(5), 0.01);
Assert.assertEquals(true, resultSet.getBoolean(6));
Assert.assertEquals(0x1, resultSet.getByte(7));
Assert.assertEquals(2, resultSet.getShort(8));
Assert.assertEquals(10, resultSet.getLong(9));
Assert.assertEquals(Date.valueOf("2014-06-20"), resultSet.getDate(10));
Assert.assertEquals(Timestamp.valueOf("2014-06-20 07:37:00"), resultSet.getTimestamp(11));
Assert.assertEquals(new BigDecimal("1000000000"), resultSet.getBigDecimal(12));
Assert.assertEquals("\"{\"key1\":\"value1\"}", resultSet.getString(13));
Assert.assertEquals("[\"a\",\"b\",\"c\"]", resultSet.getString(14));
Assert.assertEquals("{\"name\":\"first\",\"attr\":\"second\"}", resultSet.getString(15));
Assert.assertFalse(resultSet.next());
Assert.assertFalse(resultSet.next());
try {
resultSet.getObject(1);
} catch (SQLException e) {
// Expected: no more rows
}
}
Aggregations