use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.
the class HiveExploreStructuredRecordTestRun method testSelectStar.
@Test
public void testSelectStar() throws Exception {
List<ColumnDesc> expectedSchema = Lists.newArrayList(new ColumnDesc(MY_TABLE_NAME + ".id", "STRING", 1, null), new ColumnDesc(MY_TABLE_NAME + ".subject", "STRING", 2, null), new ColumnDesc(MY_TABLE_NAME + ".body", "STRING", 3, null), new ColumnDesc(MY_TABLE_NAME + ".sender", "STRING", 4, null));
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + MY_TABLE_NAME).get();
// check schema
Assert.assertEquals(expectedSchema, results.getResultSchema());
List<Object> columns = results.next().getColumns();
// check results
Assert.assertEquals("email1", columns.get(0));
Assert.assertEquals("this is the subject", columns.get(1));
Assert.assertEquals("this is the body", columns.get(2));
Assert.assertEquals("sljackson@boss.com", columns.get(3));
// should not be any more
Assert.assertFalse(results.hasNext());
}
use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.
the class HiveExploreTableTestRun method testTableWithDateTimestamp.
@Test
public void testTableWithDateTimestamp() throws Exception {
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
DatasetId dtTsTable = NAMESPACE_ID.dataset("dt_ts_table");
DatasetId otherDtTsTable = NAMESPACE_ID.dataset("other_dt_ts_table");
Schema schema = Schema.recordOf("recordWithDateTimestamp", Schema.Field.of("int_field", Schema.of(Schema.Type.INT)), Schema.Field.of("string_field", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("date_field", Schema.nullableOf(Schema.of(Schema.LogicalType.DATE))), Schema.Field.of("ts_millis_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MILLIS))), Schema.Field.of("ts_micros_field", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MICROS))));
datasetFramework.addInstance(Table.class.getName(), dtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("dt_ts_table").build());
datasetFramework.addInstance(Table.class.getName(), otherDtTsTable, TableProperties.builder().setSchema(schema).setRowFieldName("int_field").setExploreTableName("other_dt_ts_table").build());
try {
// Accessing dataset instance to perform data operations
Table table = datasetFramework.getDataset(dtTsTable, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
Transaction tx = transactionManager.startShort(100);
((TransactionAware) table).startTx(tx);
Put put = new Put(Bytes.toBytes("row1"));
put.add("int_field", 1);
put.add("string_field", "alice");
put.add("date_field", 0);
put.add("ts_millis_field", 1536336590595L);
put.add("ts_micros_field", 1536336590595123L);
table.put(put);
put = new Put(Bytes.toBytes("row2"));
put.add("int_field", 2);
put.add("string_field", "bob");
table.put(put);
((TransactionAware) table).commitTx();
transactionManager.canCommit(tx.getTransactionId(), ((TransactionAware) table).getTxChanges());
transactionManager.commit(tx.getTransactionId(), tx.getWritePointer());
((TransactionAware) table).postTxCommit();
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from dt_ts_table").get();
List<Object> columns = results.next().getColumns();
Assert.assertEquals(5, columns.size());
Assert.assertEquals("alice", columns.get(1));
Assert.assertEquals("1970-01-01", columns.get(2));
Assert.assertEquals("2018-09-07 16:09:50.595", columns.get(3));
Assert.assertEquals("2018-09-07 16:09:50.595123", columns.get(4));
columns = results.next().getColumns();
Assert.assertEquals(5, columns.size());
Assert.assertEquals("bob", columns.get(1));
Assert.assertNull(columns.get(2));
Assert.assertNull(columns.get(3));
Assert.assertNull(columns.get(4));
String command = "insert into other_dt_ts_table select int_field, string_field, date_field, ts_millis_field, " + "ts_micros_field from dt_ts_table";
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
command = "select string_field, date_field, ts_millis_field, ts_micros_field from other_dt_ts_table";
runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("string_field", "STRING", 1, null), new ColumnDesc("date_field", "DATE", 2, null), new ColumnDesc("ts_millis_field", "TIMESTAMP", 3, null), new ColumnDesc("ts_micros_field", "TIMESTAMP", 4, null)), Lists.newArrayList(new QueryResult(Lists.newArrayList("alice", "1970-01-01", "2018-09-07 16:09:50.595", "2018-09-07 16:09:50.595123")), new QueryResult(Lists.newArrayList("bob", null, null, null))));
} finally {
datasetFramework.deleteInstance(dtTsTable);
datasetFramework.deleteInstance(otherDtTsTable);
}
}
use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.
the class HiveExploreObjectMappedTableTestRun method testSelectStar.
public void testSelectStar(String tableToQuery, String tableInSchema) throws Exception {
List<ColumnDesc> expectedSchema = Lists.newArrayList(new ColumnDesc(tableInSchema + ".row_key", "STRING", 1, null), new ColumnDesc(tableInSchema + ".bytearrayfield", "BINARY", 2, null), new ColumnDesc(tableInSchema + ".doublefield", "DOUBLE", 3, null), new ColumnDesc(tableInSchema + ".floatfield", "FLOAT", 4, null), new ColumnDesc(tableInSchema + ".intfield", "INT", 5, null), new ColumnDesc(tableInSchema + ".longfield", "BIGINT", 6, null), new ColumnDesc(tableInSchema + ".stringfield", "STRING", 7, null));
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + tableToQuery).get();
// check schema
Assert.assertEquals(expectedSchema, results.getResultSchema());
List<Object> columns = results.next().getColumns();
// check record1
Assert.assertEquals("123", columns.get(0));
Assert.assertArrayEquals(record1.byteArrayField, (byte[]) columns.get(1));
Assert.assertTrue(Math.abs(record1.doubleField - (Double) columns.get(2)) < 0.000001);
// sigh... why are floats returned as doubles??
Assert.assertTrue(Math.abs(record1.floatField - (Double) columns.get(3)) < 0.000001);
Assert.assertEquals(record1.intField, columns.get(4));
Assert.assertEquals(record1.longField, columns.get(5));
Assert.assertEquals(record1.stringField, columns.get(6));
// check record2
columns = results.next().getColumns();
Assert.assertEquals("456", columns.get(0));
Assert.assertArrayEquals(record2.byteArrayField, (byte[]) columns.get(1));
Assert.assertTrue(Math.abs(record2.doubleField - (Double) columns.get(2)) < 0.000001);
Assert.assertTrue(Math.abs(record2.floatField - (Double) columns.get(3)) < 0.000001);
Assert.assertEquals(record2.intField, columns.get(4));
Assert.assertEquals(record2.longField, columns.get(5));
Assert.assertEquals(record2.stringField, columns.get(6));
// should not be any more
Assert.assertFalse(results.hasNext());
}
use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.
the class HiveExploreServiceFileSetTestRun method testOrcFileset.
@Test
public void testOrcFileset() throws Exception {
final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("orcfiles");
final String tableName = getDatasetHiveName(datasetInstanceId);
// create a time partitioned file set
datasetFramework.addInstance("fileSet", datasetInstanceId, FileSetProperties.builder().setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.ql.io.orc.OrcSerde").setExploreInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").setExploreSchema("id int, name string").build());
// verify that the hive table was created for this file set
runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(tableName))));
// insert data into the table
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, String.format("insert into table %s values (1, 'samuel'), (2, 'dwayne')", tableName)).get();
result.close();
// verify that we can query the key-values in the file with Hive
runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, Lists.newArrayList(new ColumnDesc(tableName + ".id", "INT", 1, null), new ColumnDesc(tableName + ".name", "STRING", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(1, "samuel")), new QueryResult(Lists.<Object>newArrayList(2, "dwayne"))));
// drop the dataset
datasetFramework.deleteInstance(datasetInstanceId);
// verify the Hive table is gone
runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.<QueryResult>emptyList());
}
use of io.cdap.cdap.explore.client.ExploreExecutionResult in project cdap by cdapio.
the class WritableDatasetTestRun method assertSelectAll.
private void assertSelectAll(NamespaceId namespace, String table, List<List<Object>> expectedResults) throws Exception {
ExploreExecutionResult result = exploreClient.submit(namespace, "select * from " + table).get();
for (List<Object> expectedResult : expectedResults) {
Assert.assertEquals(expectedResult, result.next().getColumns());
}
Assert.assertFalse(result.hasNext());
result.close();
}
Aggregations