use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class GenerateClientUsageExample method queryClient.
public void queryClient() throws Exception {
// Construct the client used to interact with CDAP
QueryClient queryClient = new QueryClient(clientConfig);
// Perform an ad-hoc query using the Purchase example
ListenableFuture<ExploreExecutionResult> resultFuture = queryClient.execute(NamespaceId.DEFAULT, "SELECT * FROM dataset_history WHERE customer IN ('Alice','Bob')");
ExploreExecutionResult results = resultFuture.get();
// Fetch schema
List<ColumnDesc> schema = results.getResultSchema();
String[] header = new String[schema.size()];
for (int i = 0; i < header.length; i++) {
ColumnDesc column = schema.get(i);
// Hive columns start at 1
int index = column.getPosition() - 1;
header[index] = column.getName() + ": " + column.getType();
}
}
use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class ExploreStatementTest method executeTest.
@Test
public void executeTest() throws Exception {
List<ColumnDesc> columnDescriptions = Lists.newArrayList(new ColumnDesc("column1", "STRING", 1, ""));
List<QueryResult> queryResults = Lists.newArrayList();
ExploreClient exploreClient = new MockExploreClient(ImmutableMap.of("mock_query_1", columnDescriptions, "mock_query_2", columnDescriptions, "mock_query_3", columnDescriptions, "mock_query_4", columnDescriptions), ImmutableMap.of("mock_query_1", queryResults, "mock_query_2", queryResults, "mock_query_3", queryResults, "mock_query_4", queryResults));
// Make sure an empty query still has a ResultSet associated to it
ExploreStatement statement = new ExploreStatement(null, exploreClient, "ns1");
Assert.assertTrue(statement.execute("mock_query_1"));
ResultSet rs = statement.getResultSet();
Assert.assertNotNull(rs);
Assert.assertFalse(rs.isClosed());
Assert.assertFalse(rs.next());
rs = statement.executeQuery("mock_query_2");
Assert.assertNotNull(rs);
Assert.assertFalse(rs.isClosed());
Assert.assertFalse(rs.next());
// Make sure subsequent calls to an execute method close the previous results
ResultSet rs2 = statement.executeQuery("mock_query_3");
Assert.assertTrue(rs.isClosed());
Assert.assertNotNull(rs2);
Assert.assertFalse(rs2.isClosed());
Assert.assertFalse(rs2.next());
Assert.assertTrue(statement.execute("mock_query_4"));
Assert.assertTrue(rs2.isClosed());
}
use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class StaticEmptyExploreResultSet method getMetaData.
@Override
public ResultSetMetaData getMetaData() throws SQLException {
if (isClosed()) {
throw new SQLException("Resultset is closed");
}
ImmutableList.Builder<ColumnDesc> builder = ImmutableList.builder();
for (int i = 0; i < schema.size(); i++) {
ImmutablePair<String, String> pair = schema.get(i);
builder.add(new ColumnDesc(pair.getFirst(), pair.getSecond(), i + 1, ""));
}
return new ExploreResultSetMetaData(builder.build());
}
use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class HiveExploreServiceFileSetTestRun method testPartitionedAvroSchemaUpdate.
@Test
public void testPartitionedAvroSchemaUpdate() throws Exception {
final DatasetId datasetId = NAMESPACE_ID.dataset("avroupd");
final String tableName = getDatasetHiveName(datasetId);
// create a time partitioned file set
datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString()).build());
// Accessing dataset instance to perform data operations
PartitionedFileSet partitioned = datasetFramework.getDataset(datasetId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(partitioned);
FileSet fileSet = partitioned.getEmbeddedFileSet();
// add a partition
Location location4 = fileSet.getLocation("file4/nn");
FileWriterHelper.generateAvroFile(location4.getOutputStream(), "x", 4, 5);
addPartition(partitioned, PartitionKey.builder().addIntField("number", 4).build(), "file4");
// new partition should have new format, validate with query
List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "STRING", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
new QueryResult(Lists.<Object>newArrayList("x4", "#4", 4))));
// update the partitioned file set
datasetFramework.updateInstance(datasetId, PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".number", "INT", 2, null));
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName + " WHERE number=4", true, expectedColumns, Lists.newArrayList(// avro file has key=x4, value=#4
new QueryResult(Lists.<Object>newArrayList("x4", 4))));
}
use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class HiveExploreServiceFileSetTestRun method testCreateAddAlterDrop.
private void testCreateAddAlterDrop(@Nullable String dbName, @Nullable String tableName) throws Exception {
DatasetId datasetInstanceId = NAMESPACE_ID.dataset("files");
String hiveTableName = getDatasetHiveName(datasetInstanceId);
String showTablesCommand = "show tables";
FileSetProperties.Builder props = FileSetProperties.builder().setBasePath("myPath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", SCHEMA.toString());
if (tableName != null) {
props.setExploreTableName(tableName);
hiveTableName = tableName;
}
String queryTableName = hiveTableName;
if (dbName != null) {
props.setExploreDatabaseName(dbName);
runCommand(NAMESPACE_ID, "create database " + dbName, false, null, null);
showTablesCommand += " in " + dbName;
queryTableName = dbName + "." + queryTableName;
}
// create a time partitioned file set
datasetFramework.addInstance("fileSet", datasetInstanceId, props.build());
// verify that the hive table was created for this file set
runCommand(NAMESPACE_ID, showTablesCommand, true, null, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(hiveTableName))));
// Accessing dataset instance to perform data operations
FileSet fileSet = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(fileSet);
// add a file
FileWriterHelper.generateAvroFile(fileSet.getLocation("file1").getOutputStream(), "a", 0, 3);
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null), new ColumnDesc(hiveTableName + ".value", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("a0", "#0")), new QueryResult(Lists.<Object>newArrayList("a1", "#1")), new QueryResult(Lists.<Object>newArrayList("a2", "#2"))));
// add another file
FileWriterHelper.generateAvroFile(fileSet.getLocation("file2").getOutputStream(), "b", 3, 5);
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(5L))));
// disable explore by updating the props
datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(false).build());
// verify the Hive table is gone
runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
// re-enable explore by updating the props
datasetFramework.updateInstance(datasetInstanceId, props.setEnableExploreOnCreate(true).build());
// verify that we can query again
runCommand(NAMESPACE_ID, "SELECT count(*) AS count FROM " + queryTableName, true, Lists.newArrayList(new ColumnDesc("count", "BIGINT", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(5L))));
// change the explore schema by updating the props
datasetFramework.updateInstance(datasetInstanceId, props.setTableProperty("avro.schema.literal", K_SCHEMA.toString()).build());
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT * FROM " + queryTableName + " ORDER BY key", true, Lists.newArrayList(new ColumnDesc(hiveTableName + ".key", "STRING", 1, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("a0")), new QueryResult(Lists.<Object>newArrayList("a1")), new QueryResult(Lists.<Object>newArrayList("a2")), new QueryResult(Lists.<Object>newArrayList("b3")), new QueryResult(Lists.<Object>newArrayList("b4"))));
// drop the dataset
datasetFramework.deleteInstance(datasetInstanceId);
// verify the Hive table is gone
runCommand(NAMESPACE_ID, showTablesCommand, false, null, Collections.<QueryResult>emptyList());
// drop the database if needed
if (dbName != null) {
runCommand(NAMESPACE_ID, "drop database " + dbName, false, null, null);
}
}
Aggregations