use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.
the class HiveExploreTableTestRun method testInsertFromJoin.
@Test
public void testInsertFromJoin() throws Exception {
DatasetId userTableID = NAMESPACE_ID.dataset("users");
DatasetId purchaseTableID = NAMESPACE_ID.dataset("purchases");
DatasetId expandedTableID = NAMESPACE_ID.dataset("expanded");
Schema userSchema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("email", Schema.of(Schema.Type.STRING)));
Schema purchaseSchema = Schema.recordOf("purchase", Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)), Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)), Schema.Field.of("userid", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
Schema expandedSchema = Schema.recordOf("expandedPurchase", Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)), Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)), Schema.Field.of("userid", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("username", Schema.of(Schema.Type.STRING)), Schema.Field.of("email", Schema.of(Schema.Type.STRING)));
datasetFramework.addInstance(Table.class.getName(), userTableID, TableProperties.builder().setSchema(userSchema).setRowFieldName("id").build());
datasetFramework.addInstance(Table.class.getName(), purchaseTableID, TableProperties.builder().setSchema(purchaseSchema).setRowFieldName("purchaseid").build());
datasetFramework.addInstance(Table.class.getName(), expandedTableID, TableProperties.builder().setSchema(expandedSchema).setRowFieldName("purchaseid").build());
Table userTable = datasetFramework.getDataset(userTableID, DatasetDefinition.NO_ARGUMENTS, null);
Table purchaseTable = datasetFramework.getDataset(purchaseTableID, DatasetDefinition.NO_ARGUMENTS, null);
TransactionAware txUserTable = (TransactionAware) userTable;
TransactionAware txPurchaseTable = (TransactionAware) purchaseTable;
Transaction tx1 = transactionManager.startShort(100);
txUserTable.startTx(tx1);
txPurchaseTable.startTx(tx1);
Put put = new Put(Bytes.toBytes("samuel"));
put.add("name", "Samuel Jackson");
put.add("email", "sjackson@gmail.com");
userTable.put(put);
put = new Put(Bytes.toBytes(1L));
put.add("userid", "samuel");
put.add("itemid", "scotch");
put.add("ct", 1);
put.add("price", 56.99d);
purchaseTable.put(put);
txUserTable.commitTx();
txPurchaseTable.commitTx();
List<byte[]> changes = new ArrayList<>();
changes.addAll(txUserTable.getTxChanges());
changes.addAll(txPurchaseTable.getTxChanges());
transactionManager.canCommit(tx1, changes);
transactionManager.commit(tx1);
txUserTable.postTxCommit();
txPurchaseTable.postTxCommit();
try {
String command = String.format("insert into table %s select P.purchaseid, P.itemid, P.userid, P.ct, P.price, U.name, U.email from " + "%s P join %s U on (P.userid = U.id)", getDatasetHiveName(expandedTableID), getDatasetHiveName(purchaseTableID), getDatasetHiveName(userTableID));
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
command = String.format("select purchaseid, itemid, userid, ct, price, username, email from %s", getDatasetHiveName(expandedTableID));
runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("purchaseid", "BIGINT", 1, null), new ColumnDesc("itemid", "STRING", 2, null), new ColumnDesc("userid", "STRING", 3, null), new ColumnDesc("ct", "INT", 4, null), new ColumnDesc("price", "DOUBLE", 5, null), new ColumnDesc("username", "STRING", 6, null), new ColumnDesc("email", "STRING", 7, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(1L, "scotch", "samuel", 1, 56.99d, "Samuel Jackson", "sjackson@gmail.com"))));
} finally {
datasetFramework.deleteInstance(userTableID);
datasetFramework.deleteInstance(purchaseTableID);
datasetFramework.deleteInstance(expandedTableID);
}
}
use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.
the class WritableDatasetTestRun method testTablesWithSpecialChars.
@Test
public void testTablesWithSpecialChars() throws Exception {
// '.' are replaced with "_" in hive, so create a dataset with . in name.
DatasetId myTable1 = NAMESPACE_ID.dataset("dot.table");
// '_' are replaced with "_" in hive, so create a dataset with . in name.
DatasetId myTable2 = NAMESPACE_ID.dataset("hyphen-table");
try {
initKeyValueTable(myTable1, true);
initKeyValueTable(myTable2, true);
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, "select * from dataset_dot_table").get();
Assert.assertEquals("1", result.next().getColumns().get(0).toString());
result.close();
result = exploreClient.submit(NAMESPACE_ID, "select * from dataset_hyphen_table").get();
Assert.assertEquals("1", result.next().getColumns().get(0).toString());
result.close();
} finally {
datasetFramework.deleteInstance(myTable1);
datasetFramework.deleteInstance(myTable2);
}
}
use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.
the class WritableDatasetTestRun method writeIntoNonScannableDataset.
@Test
public void writeIntoNonScannableDataset() throws Exception {
DatasetId writableTable = NAMESPACE_ID.dataset("writable_table");
String writableTableName = getDatasetHiveName(writableTable);
datasetFramework.addModule(keyExtendedStructValueTable, new KeyExtendedStructValueTableDefinition.KeyExtendedStructValueTableModule());
datasetFramework.addInstance("keyExtendedStructValueTable", extendedTable, DatasetProperties.EMPTY);
datasetFramework.addModule(writableKeyStructValueTable, new WritableKeyStructValueTableDefinition.KeyStructValueTableModule());
datasetFramework.addInstance("writableKeyStructValueTable", writableTable, DatasetProperties.EMPTY);
try {
// Accessing dataset instance to perform data operations
KeyExtendedStructValueTableDefinition.KeyExtendedStructValueTable table = datasetFramework.getDataset(extendedTable, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
Transaction tx1 = transactionManager.startShort(100);
table.startTx(tx1);
KeyExtendedStructValueTableDefinition.KeyExtendedValue value1 = new KeyExtendedStructValueTableDefinition.KeyExtendedValue("10", new KeyStructValueTableDefinition.KeyValue.Value("ten", Lists.newArrayList(10, 11, 12)), 20);
table.put("10", value1);
Assert.assertEquals(value1, table.get("10"));
Assert.assertTrue(table.commitTx());
transactionManager.canCommit(tx1, table.getTxChanges());
transactionManager.commit(tx1);
table.postTxCommit();
String query = "insert into table " + writableTableName + " select key,value from " + extendedTableName;
ListenableFuture<ExploreExecutionResult> future = exploreClient.submit(NAMESPACE_ID, query);
ExploreExecutionResult result = future.get();
result.close();
KeyStructValueTableDefinition.KeyStructValueTable table2 = datasetFramework.getDataset(writableTable, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
Transaction tx = transactionManager.startShort(100);
Assert.assertNotNull(table2);
table2.startTx(tx);
Assert.assertEquals(new KeyStructValueTableDefinition.KeyValue.Value("ten", Lists.newArrayList(10, 11, 12)), table2.get("10_2"));
Assert.assertTrue(table.commitTx());
transactionManager.canCommit(tx, table.getTxChanges());
transactionManager.commit(tx);
table.postTxCommit();
} finally {
datasetFramework.deleteInstance(writableTable);
datasetFramework.deleteInstance(extendedTable);
datasetFramework.deleteModule(writableKeyStructValueTable);
datasetFramework.deleteModule(keyExtendedStructValueTable);
}
}
use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.
the class WritableDatasetTestRun method writeFromAnotherNamespace.
@Test
public void writeFromAnotherNamespace() throws Exception {
datasetFramework.addModule(kvTable, new KeyValueTableDefinition.KeyValueTableModule());
datasetFramework.addInstance("kvTable", simpleTable, DatasetProperties.EMPTY);
datasetFramework.addModule(otherKvTable, new KeyValueTableDefinition.KeyValueTableModule());
datasetFramework.addInstance("kvTable", otherSimpleTable, DatasetProperties.EMPTY);
try {
ExploreExecutionResult result = exploreClient.submit(OTHER_NAMESPACE_ID, "select * from " + simpleTableName).get();
Assert.assertFalse(result.hasNext());
// Accessing dataset instance to perform data operations
KeyValueTableDefinition.KeyValueTable table = datasetFramework.getDataset(simpleTable, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
Transaction tx = transactionManager.startShort(100);
table.startTx(tx);
table.put(1, "one");
Assert.assertTrue(table.commitTx());
transactionManager.canCommit(tx, table.getTxChanges());
transactionManager.commit(tx);
table.postTxCommit();
String query = String.format("insert into table %s select * from cdap_namespace.%s", otherSimpleTableName, simpleTableName);
exploreClient.submit(OTHER_NAMESPACE_ID, query).get().close();
assertSelectAll(NAMESPACE_ID, simpleTableName, ImmutableList.<List<Object>>of(ImmutableList.<Object>of(1, "one")));
// Write into otherSimpleTable and assert that it doesn't show up in queries over simpleTable
table = datasetFramework.getDataset(otherSimpleTable, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
tx = transactionManager.startShort(100);
table.startTx(tx);
table.put(2, "two");
Assert.assertTrue(table.commitTx());
transactionManager.canCommit(tx, table.getTxChanges());
transactionManager.commit(tx);
table.postTxCommit();
assertSelectAll(OTHER_NAMESPACE_ID, otherSimpleTableName, ImmutableList.<List<Object>>of(ImmutableList.<Object>of(1, "one"), ImmutableList.<Object>of(2, "two")));
assertSelectAll(NAMESPACE_ID, simpleTableName, ImmutableList.<List<Object>>of(ImmutableList.<Object>of(1, "one")));
} finally {
datasetFramework.deleteInstance(simpleTable);
datasetFramework.deleteInstance(otherSimpleTable);
datasetFramework.deleteModule(kvTable);
datasetFramework.deleteModule(otherKvTable);
}
}
use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.
the class GetStreamStatsCommand method perform.
@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
long currentTime = System.currentTimeMillis();
StreamId streamId = cliConfig.getCurrentNamespace().stream(arguments.get(ArgumentName.STREAM.toString()));
// limit limit to [1, MAX_LIMIT]
Integer limitInput = arguments.getIntOptional(ArgumentName.LIMIT.toString(), DEFAULT_LIMIT);
// we know we're passing a non-null default, so limitInput should never be null.
Preconditions.checkNotNull(limitInput);
int limit = Math.max(1, Math.min(MAX_LIMIT, limitInput));
long startTime = getTimestamp(arguments.getOptional(ArgumentName.START_TIME.toString(), "min"), currentTime);
long endTime = getTimestamp(arguments.getOptional(ArgumentName.END_TIME.toString(), "max"), currentTime);
// hack to validate streamId
StreamProperties config = streamClient.getConfig(streamId);
if (config.getFormat().getName().equals("text")) {
output.printf("No schema found for stream '%s'", streamId.getEntityName());
output.println();
return;
}
// build processorMap: Hive column name -> StatsProcessor
Map<String, Set<StatsProcessor>> processorMap = new HashMap<>();
Schema streamSchema = config.getFormat().getSchema();
for (Schema.Field field : streamSchema.getFields()) {
Schema fieldSchema = field.getSchema();
String hiveColumnName = cdapSchemaColumName2HiveColumnName(streamId, field.getName());
processorMap.put(hiveColumnName, getProcessorsForType(fieldSchema.getType(), fieldSchema.getUnionSchemas()));
}
// get a list of stream events and calculates various statistics about the events
String timestampCol = getTimestampHiveColumn(streamId);
ListenableFuture<ExploreExecutionResult> resultsFuture = queryClient.execute(streamId.getParent(), "SELECT * FROM " + getHiveTableName(streamId) + " WHERE " + timestampCol + " BETWEEN " + startTime + " AND " + endTime + " LIMIT " + limit);
ExploreExecutionResult results = resultsFuture.get(1, TimeUnit.MINUTES);
List<ColumnDesc> schema = results.getResultSchema();
// apply StatsProcessors to every element in every row
int rows = 0;
while (results.hasNext()) {
rows++;
QueryResult row = results.next();
for (int i = 0; i < row.getColumns().size(); i++) {
Object column = row.getColumns().get(i);
ColumnDesc columnDesc = schema.get(i);
String columnName = columnDesc.getName();
if (isUserHiveColumn(streamId, columnName)) {
Set<StatsProcessor> processors = processorMap.get(columnName);
if (processors != null) {
for (StatsProcessor processor : processors) {
processor.process(column);
}
}
}
}
}
// print report
for (ColumnDesc columnDesc : schema) {
if (isUserHiveColumn(streamId, columnDesc.getName())) {
String truncatedColumnName = getTruncatedColumnName(streamId, columnDesc.getName());
output.printf("column: %s, type: %s", truncatedColumnName, columnDesc.getType());
output.println();
Set<StatsProcessor> processors = processorMap.get(columnDesc.getName());
if (processors != null && !processors.isEmpty()) {
for (StatsProcessor processor : processors) {
processor.printReport(output);
}
output.println();
} else {
output.println("No statistics available");
output.println();
}
}
}
output.printf("Analyzed %d Stream events in the time range [%d, %d]...", rows, startTime, endTime);
output.println();
output.println();
}
Aggregations