Search in sources :

Example 21 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class HiveExploreTableTestRun method testInsertFromJoin.

@Test
public void testInsertFromJoin() throws Exception {
    DatasetId userTableID = NAMESPACE_ID.dataset("users");
    DatasetId purchaseTableID = NAMESPACE_ID.dataset("purchases");
    DatasetId expandedTableID = NAMESPACE_ID.dataset("expanded");
    Schema userSchema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("email", Schema.of(Schema.Type.STRING)));
    Schema purchaseSchema = Schema.recordOf("purchase", Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)), Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)), Schema.Field.of("userid", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
    Schema expandedSchema = Schema.recordOf("expandedPurchase", Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)), Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)), Schema.Field.of("userid", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("username", Schema.of(Schema.Type.STRING)), Schema.Field.of("email", Schema.of(Schema.Type.STRING)));
    datasetFramework.addInstance(Table.class.getName(), userTableID, TableProperties.builder().setSchema(userSchema).setRowFieldName("id").build());
    datasetFramework.addInstance(Table.class.getName(), purchaseTableID, TableProperties.builder().setSchema(purchaseSchema).setRowFieldName("purchaseid").build());
    datasetFramework.addInstance(Table.class.getName(), expandedTableID, TableProperties.builder().setSchema(expandedSchema).setRowFieldName("purchaseid").build());
    Table userTable = datasetFramework.getDataset(userTableID, DatasetDefinition.NO_ARGUMENTS, null);
    Table purchaseTable = datasetFramework.getDataset(purchaseTableID, DatasetDefinition.NO_ARGUMENTS, null);
    TransactionAware txUserTable = (TransactionAware) userTable;
    TransactionAware txPurchaseTable = (TransactionAware) purchaseTable;
    Transaction tx1 = transactionManager.startShort(100);
    txUserTable.startTx(tx1);
    txPurchaseTable.startTx(tx1);
    Put put = new Put(Bytes.toBytes("samuel"));
    put.add("name", "Samuel Jackson");
    put.add("email", "sjackson@gmail.com");
    userTable.put(put);
    put = new Put(Bytes.toBytes(1L));
    put.add("userid", "samuel");
    put.add("itemid", "scotch");
    put.add("ct", 1);
    put.add("price", 56.99d);
    purchaseTable.put(put);
    txUserTable.commitTx();
    txPurchaseTable.commitTx();
    List<byte[]> changes = new ArrayList<>();
    changes.addAll(txUserTable.getTxChanges());
    changes.addAll(txPurchaseTable.getTxChanges());
    transactionManager.canCommit(tx1, changes);
    transactionManager.commit(tx1);
    txUserTable.postTxCommit();
    txPurchaseTable.postTxCommit();
    try {
        String command = String.format("insert into table %s select P.purchaseid, P.itemid, P.userid, P.ct, P.price, U.name, U.email from " + "%s P join %s U on (P.userid = U.id)", getDatasetHiveName(expandedTableID), getDatasetHiveName(purchaseTableID), getDatasetHiveName(userTableID));
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
        Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
        command = String.format("select purchaseid, itemid, userid, ct, price, username, email from %s", getDatasetHiveName(expandedTableID));
        runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("purchaseid", "BIGINT", 1, null), new ColumnDesc("itemid", "STRING", 2, null), new ColumnDesc("userid", "STRING", 3, null), new ColumnDesc("ct", "INT", 4, null), new ColumnDesc("price", "DOUBLE", 5, null), new ColumnDesc("username", "STRING", 6, null), new ColumnDesc("email", "STRING", 7, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(1L, "scotch", "samuel", 1, 56.99d, "Samuel Jackson", "sjackson@gmail.com"))));
    } finally {
        datasetFramework.deleteInstance(userTableID);
        datasetFramework.deleteInstance(purchaseTableID);
        datasetFramework.deleteInstance(expandedTableID);
    }
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) Table(co.cask.cdap.api.dataset.table.Table) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Put(co.cask.cdap.api.dataset.table.Put) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 22 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class WritableDatasetTestRun method testTablesWithSpecialChars.

@Test
public void testTablesWithSpecialChars() throws Exception {
    // '.' are replaced with "_" in hive, so create a dataset with . in name.
    DatasetId myTable1 = NAMESPACE_ID.dataset("dot.table");
    // '_' are replaced with "_" in hive, so create a dataset with . in name.
    DatasetId myTable2 = NAMESPACE_ID.dataset("hyphen-table");
    try {
        initKeyValueTable(myTable1, true);
        initKeyValueTable(myTable2, true);
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, "select * from dataset_dot_table").get();
        Assert.assertEquals("1", result.next().getColumns().get(0).toString());
        result.close();
        result = exploreClient.submit(NAMESPACE_ID, "select * from dataset_hyphen_table").get();
        Assert.assertEquals("1", result.next().getColumns().get(0).toString());
        result.close();
    } finally {
        datasetFramework.deleteInstance(myTable1);
        datasetFramework.deleteInstance(myTable2);
    }
}
Also used : ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 23 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class WritableDatasetTestRun method writeIntoNonScannableDataset.

@Test
public void writeIntoNonScannableDataset() throws Exception {
    DatasetId writableTable = NAMESPACE_ID.dataset("writable_table");
    String writableTableName = getDatasetHiveName(writableTable);
    datasetFramework.addModule(keyExtendedStructValueTable, new KeyExtendedStructValueTableDefinition.KeyExtendedStructValueTableModule());
    datasetFramework.addInstance("keyExtendedStructValueTable", extendedTable, DatasetProperties.EMPTY);
    datasetFramework.addModule(writableKeyStructValueTable, new WritableKeyStructValueTableDefinition.KeyStructValueTableModule());
    datasetFramework.addInstance("writableKeyStructValueTable", writableTable, DatasetProperties.EMPTY);
    try {
        // Accessing dataset instance to perform data operations
        KeyExtendedStructValueTableDefinition.KeyExtendedStructValueTable table = datasetFramework.getDataset(extendedTable, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(table);
        Transaction tx1 = transactionManager.startShort(100);
        table.startTx(tx1);
        KeyExtendedStructValueTableDefinition.KeyExtendedValue value1 = new KeyExtendedStructValueTableDefinition.KeyExtendedValue("10", new KeyStructValueTableDefinition.KeyValue.Value("ten", Lists.newArrayList(10, 11, 12)), 20);
        table.put("10", value1);
        Assert.assertEquals(value1, table.get("10"));
        Assert.assertTrue(table.commitTx());
        transactionManager.canCommit(tx1, table.getTxChanges());
        transactionManager.commit(tx1);
        table.postTxCommit();
        String query = "insert into table " + writableTableName + " select key,value from " + extendedTableName;
        ListenableFuture<ExploreExecutionResult> future = exploreClient.submit(NAMESPACE_ID, query);
        ExploreExecutionResult result = future.get();
        result.close();
        KeyStructValueTableDefinition.KeyStructValueTable table2 = datasetFramework.getDataset(writableTable, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(table);
        Transaction tx = transactionManager.startShort(100);
        Assert.assertNotNull(table2);
        table2.startTx(tx);
        Assert.assertEquals(new KeyStructValueTableDefinition.KeyValue.Value("ten", Lists.newArrayList(10, 11, 12)), table2.get("10_2"));
        Assert.assertTrue(table.commitTx());
        transactionManager.canCommit(tx, table.getTxChanges());
        transactionManager.commit(tx);
        table.postTxCommit();
    } finally {
        datasetFramework.deleteInstance(writableTable);
        datasetFramework.deleteInstance(extendedTable);
        datasetFramework.deleteModule(writableKeyStructValueTable);
        datasetFramework.deleteModule(keyExtendedStructValueTable);
    }
}
Also used : KeyStructValueTableDefinition(co.cask.cdap.explore.service.datasets.KeyStructValueTableDefinition) WritableKeyStructValueTableDefinition(co.cask.cdap.explore.service.datasets.WritableKeyStructValueTableDefinition) KeyExtendedStructValueTableDefinition(co.cask.cdap.explore.service.datasets.KeyExtendedStructValueTableDefinition) DatasetId(co.cask.cdap.proto.id.DatasetId) Transaction(org.apache.tephra.Transaction) WritableKeyStructValueTableDefinition(co.cask.cdap.explore.service.datasets.WritableKeyStructValueTableDefinition) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 24 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class WritableDatasetTestRun method writeFromAnotherNamespace.

@Test
public void writeFromAnotherNamespace() throws Exception {
    datasetFramework.addModule(kvTable, new KeyValueTableDefinition.KeyValueTableModule());
    datasetFramework.addInstance("kvTable", simpleTable, DatasetProperties.EMPTY);
    datasetFramework.addModule(otherKvTable, new KeyValueTableDefinition.KeyValueTableModule());
    datasetFramework.addInstance("kvTable", otherSimpleTable, DatasetProperties.EMPTY);
    try {
        ExploreExecutionResult result = exploreClient.submit(OTHER_NAMESPACE_ID, "select * from " + simpleTableName).get();
        Assert.assertFalse(result.hasNext());
        // Accessing dataset instance to perform data operations
        KeyValueTableDefinition.KeyValueTable table = datasetFramework.getDataset(simpleTable, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(table);
        Transaction tx = transactionManager.startShort(100);
        table.startTx(tx);
        table.put(1, "one");
        Assert.assertTrue(table.commitTx());
        transactionManager.canCommit(tx, table.getTxChanges());
        transactionManager.commit(tx);
        table.postTxCommit();
        String query = String.format("insert into table %s select * from cdap_namespace.%s", otherSimpleTableName, simpleTableName);
        exploreClient.submit(OTHER_NAMESPACE_ID, query).get().close();
        assertSelectAll(NAMESPACE_ID, simpleTableName, ImmutableList.<List<Object>>of(ImmutableList.<Object>of(1, "one")));
        // Write into otherSimpleTable and assert that it doesn't show up in queries over simpleTable
        table = datasetFramework.getDataset(otherSimpleTable, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(table);
        tx = transactionManager.startShort(100);
        table.startTx(tx);
        table.put(2, "two");
        Assert.assertTrue(table.commitTx());
        transactionManager.canCommit(tx, table.getTxChanges());
        transactionManager.commit(tx);
        table.postTxCommit();
        assertSelectAll(OTHER_NAMESPACE_ID, otherSimpleTableName, ImmutableList.<List<Object>>of(ImmutableList.<Object>of(1, "one"), ImmutableList.<Object>of(2, "two")));
        assertSelectAll(NAMESPACE_ID, simpleTableName, ImmutableList.<List<Object>>of(ImmutableList.<Object>of(1, "one")));
    } finally {
        datasetFramework.deleteInstance(simpleTable);
        datasetFramework.deleteInstance(otherSimpleTable);
        datasetFramework.deleteModule(kvTable);
        datasetFramework.deleteModule(otherKvTable);
    }
}
Also used : Transaction(org.apache.tephra.Transaction) KeyValueTableDefinition(co.cask.cdap.explore.service.datasets.KeyValueTableDefinition) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 25 with ExploreExecutionResult

use of co.cask.cdap.explore.client.ExploreExecutionResult in project cdap by caskdata.

the class GetStreamStatsCommand method perform.

@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
    long currentTime = System.currentTimeMillis();
    StreamId streamId = cliConfig.getCurrentNamespace().stream(arguments.get(ArgumentName.STREAM.toString()));
    // limit limit to [1, MAX_LIMIT]
    Integer limitInput = arguments.getIntOptional(ArgumentName.LIMIT.toString(), DEFAULT_LIMIT);
    // we know we're passing a non-null default, so limitInput should never be null.
    Preconditions.checkNotNull(limitInput);
    int limit = Math.max(1, Math.min(MAX_LIMIT, limitInput));
    long startTime = getTimestamp(arguments.getOptional(ArgumentName.START_TIME.toString(), "min"), currentTime);
    long endTime = getTimestamp(arguments.getOptional(ArgumentName.END_TIME.toString(), "max"), currentTime);
    // hack to validate streamId
    StreamProperties config = streamClient.getConfig(streamId);
    if (config.getFormat().getName().equals("text")) {
        output.printf("No schema found for stream '%s'", streamId.getEntityName());
        output.println();
        return;
    }
    // build processorMap: Hive column name -> StatsProcessor
    Map<String, Set<StatsProcessor>> processorMap = new HashMap<>();
    Schema streamSchema = config.getFormat().getSchema();
    for (Schema.Field field : streamSchema.getFields()) {
        Schema fieldSchema = field.getSchema();
        String hiveColumnName = cdapSchemaColumName2HiveColumnName(streamId, field.getName());
        processorMap.put(hiveColumnName, getProcessorsForType(fieldSchema.getType(), fieldSchema.getUnionSchemas()));
    }
    // get a list of stream events and calculates various statistics about the events
    String timestampCol = getTimestampHiveColumn(streamId);
    ListenableFuture<ExploreExecutionResult> resultsFuture = queryClient.execute(streamId.getParent(), "SELECT * FROM " + getHiveTableName(streamId) + " WHERE " + timestampCol + " BETWEEN " + startTime + " AND " + endTime + " LIMIT " + limit);
    ExploreExecutionResult results = resultsFuture.get(1, TimeUnit.MINUTES);
    List<ColumnDesc> schema = results.getResultSchema();
    // apply StatsProcessors to every element in every row
    int rows = 0;
    while (results.hasNext()) {
        rows++;
        QueryResult row = results.next();
        for (int i = 0; i < row.getColumns().size(); i++) {
            Object column = row.getColumns().get(i);
            ColumnDesc columnDesc = schema.get(i);
            String columnName = columnDesc.getName();
            if (isUserHiveColumn(streamId, columnName)) {
                Set<StatsProcessor> processors = processorMap.get(columnName);
                if (processors != null) {
                    for (StatsProcessor processor : processors) {
                        processor.process(column);
                    }
                }
            }
        }
    }
    // print report
    for (ColumnDesc columnDesc : schema) {
        if (isUserHiveColumn(streamId, columnDesc.getName())) {
            String truncatedColumnName = getTruncatedColumnName(streamId, columnDesc.getName());
            output.printf("column: %s, type: %s", truncatedColumnName, columnDesc.getType());
            output.println();
            Set<StatsProcessor> processors = processorMap.get(columnDesc.getName());
            if (processors != null && !processors.isEmpty()) {
                for (StatsProcessor processor : processors) {
                    processor.printReport(output);
                }
                output.println();
            } else {
                output.println("No statistics available");
                output.println();
            }
        }
    }
    output.printf("Analyzed %d Stream events in the time range [%d, %d]...", rows, startTime, endTime);
    output.println();
    output.println();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) StreamProperties(co.cask.cdap.proto.StreamProperties) ColumnDesc(co.cask.cdap.proto.ColumnDesc) QueryResult(co.cask.cdap.proto.QueryResult) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult)

Aggregations

ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)25 Test (org.junit.Test)16 ColumnDesc (co.cask.cdap.proto.ColumnDesc)13 QueryResult (co.cask.cdap.proto.QueryResult)9 DatasetId (co.cask.cdap.proto.id.DatasetId)7 Transaction (org.apache.tephra.Transaction)5 Schema (co.cask.cdap.api.data.schema.Schema)4 Table (co.cask.cdap.api.dataset.table.Table)3 NamespaceId (co.cask.cdap.proto.id.NamespaceId)3 StreamId (co.cask.cdap.proto.id.StreamId)3 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)2 KeyExtendedStructValueTableDefinition (co.cask.cdap.explore.service.datasets.KeyExtendedStructValueTableDefinition)2 KeyStructValueTableDefinition (co.cask.cdap.explore.service.datasets.KeyStructValueTableDefinition)2 WritableKeyStructValueTableDefinition (co.cask.cdap.explore.service.datasets.WritableKeyStructValueTableDefinition)2 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)2 QueryStatus (co.cask.cdap.proto.QueryStatus)2 StreamProperties (co.cask.cdap.proto.StreamProperties)2 Put (co.cask.cdap.api.dataset.table.Put)1 RowMaker (co.cask.cdap.cli.util.RowMaker)1 Table (co.cask.cdap.cli.util.table.Table)1