Search in sources :

Example 21 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreStructuredRecordTestRun method testSelectStar.

@Test
public void testSelectStar() throws Exception {
    List<ColumnDesc> expectedSchema = Lists.newArrayList(new ColumnDesc(MY_TABLE_NAME + ".id", "STRING", 1, null), new ColumnDesc(MY_TABLE_NAME + ".subject", "STRING", 2, null), new ColumnDesc(MY_TABLE_NAME + ".body", "STRING", 3, null), new ColumnDesc(MY_TABLE_NAME + ".sender", "STRING", 4, null));
    ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + MY_TABLE_NAME).get();
    // check schema
    Assert.assertEquals(expectedSchema, results.getResultSchema());
    List<Object> columns = results.next().getColumns();
    // check results
    Assert.assertEquals("email1", columns.get(0));
    Assert.assertEquals("this is the subject", columns.get(1));
    Assert.assertEquals("this is the body", columns.get(2));
    Assert.assertEquals("sljackson@boss.com", columns.get(3));
    // should not be any more
    Assert.assertFalse(results.hasNext());
}
Also used : ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 22 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class ExecuteQueryCommand method perform.

@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
    String query = arguments.get(ArgumentName.QUERY.toString());
    long timeOutMins = arguments.getLongOptional(ArgumentName.TIMEOUT.toString(), DEFAULT_TIMEOUT_MIN);
    ListenableFuture<ExploreExecutionResult> future = queryClient.execute(cliConfig.getCurrentNamespace(), query);
    try {
        ExploreExecutionResult executionResult = future.get(timeOutMins, TimeUnit.MINUTES);
        if (!executionResult.canContainResults()) {
            output.println("SQL statement does not output any result.");
            executionResult.close();
            return;
        }
        final List<ColumnDesc> schema = executionResult.getResultSchema();
        String[] header = new String[schema.size()];
        for (int i = 0; i < header.length; i++) {
            ColumnDesc column = schema.get(i);
            // Hive columns start at 1
            int index = column.getPosition() - 1;
            header[index] = column.getName() + ": " + column.getType();
        }
        List<QueryResult> rows = Lists.newArrayList(executionResult);
        executionResult.close();
        QueryStatus.OpStatus opStatus = executionResult.getStatus().getStatus();
        if (opStatus != QueryStatus.OpStatus.FINISHED) {
            throw new SQLException(String.format("Query '%s' execution did not finish successfully. " + "Got final state - %s", query, opStatus));
        }
        Table table = Table.builder().setHeader(header).setRows(rows, new RowMaker<QueryResult>() {

            @Override
            public List<?> makeRow(QueryResult object) {
                return object.getColumns();
            }
        }).build();
        cliConfig.getTableRenderer().render(cliConfig, output, table);
        output.printf("Fetched %d rows", rows.size()).println();
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
    } catch (ExecutionException e) {
        Throwable t = Throwables.getRootCause(e);
        if (t instanceof HandleNotFoundException) {
            throw Throwables.propagate(t);
        }
        throw new SQLException(Throwables.getRootCause(e));
    } catch (CancellationException e) {
        throw new RuntimeException("Query has been cancelled on ListenableFuture object.");
    } catch (TimeoutException e) {
        output.println("Couldn't obtain results after " + timeOutMins + "mins.");
    }
}
Also used : Table(co.cask.cdap.cli.util.table.Table) SQLException(java.sql.SQLException) RowMaker(co.cask.cdap.cli.util.RowMaker) ColumnDesc(co.cask.cdap.proto.ColumnDesc) QueryStatus(co.cask.cdap.proto.QueryStatus) HandleNotFoundException(co.cask.cdap.explore.service.HandleNotFoundException) QueryResult(co.cask.cdap.proto.QueryResult) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) TimeoutException(java.util.concurrent.TimeoutException)

Example 23 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreTableTestRun method testInsertFromJoin.

@Test
public void testInsertFromJoin() throws Exception {
    DatasetId userTableID = NAMESPACE_ID.dataset("users");
    DatasetId purchaseTableID = NAMESPACE_ID.dataset("purchases");
    DatasetId expandedTableID = NAMESPACE_ID.dataset("expanded");
    Schema userSchema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("email", Schema.of(Schema.Type.STRING)));
    Schema purchaseSchema = Schema.recordOf("purchase", Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)), Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)), Schema.Field.of("userid", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)));
    Schema expandedSchema = Schema.recordOf("expandedPurchase", Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)), Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)), Schema.Field.of("userid", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.INT)), Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("username", Schema.of(Schema.Type.STRING)), Schema.Field.of("email", Schema.of(Schema.Type.STRING)));
    datasetFramework.addInstance(Table.class.getName(), userTableID, TableProperties.builder().setSchema(userSchema).setRowFieldName("id").build());
    datasetFramework.addInstance(Table.class.getName(), purchaseTableID, TableProperties.builder().setSchema(purchaseSchema).setRowFieldName("purchaseid").build());
    datasetFramework.addInstance(Table.class.getName(), expandedTableID, TableProperties.builder().setSchema(expandedSchema).setRowFieldName("purchaseid").build());
    Table userTable = datasetFramework.getDataset(userTableID, DatasetDefinition.NO_ARGUMENTS, null);
    Table purchaseTable = datasetFramework.getDataset(purchaseTableID, DatasetDefinition.NO_ARGUMENTS, null);
    TransactionAware txUserTable = (TransactionAware) userTable;
    TransactionAware txPurchaseTable = (TransactionAware) purchaseTable;
    Transaction tx1 = transactionManager.startShort(100);
    txUserTable.startTx(tx1);
    txPurchaseTable.startTx(tx1);
    Put put = new Put(Bytes.toBytes("samuel"));
    put.add("name", "Samuel Jackson");
    put.add("email", "sjackson@gmail.com");
    userTable.put(put);
    put = new Put(Bytes.toBytes(1L));
    put.add("userid", "samuel");
    put.add("itemid", "scotch");
    put.add("ct", 1);
    put.add("price", 56.99d);
    purchaseTable.put(put);
    txUserTable.commitTx();
    txPurchaseTable.commitTx();
    List<byte[]> changes = new ArrayList<>();
    changes.addAll(txUserTable.getTxChanges());
    changes.addAll(txPurchaseTable.getTxChanges());
    transactionManager.canCommit(tx1.getTransactionId(), changes);
    transactionManager.commit(tx1.getTransactionId(), tx1.getWritePointer());
    txUserTable.postTxCommit();
    txPurchaseTable.postTxCommit();
    try {
        String command = String.format("insert into table %s select P.purchaseid, P.itemid, P.userid, P.ct, P.price, U.name, U.email from " + "%s P join %s U on (P.userid = U.id)", getDatasetHiveName(expandedTableID), getDatasetHiveName(purchaseTableID), getDatasetHiveName(userTableID));
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
        Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
        command = String.format("select purchaseid, itemid, userid, ct, price, username, email from %s", getDatasetHiveName(expandedTableID));
        runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("purchaseid", "BIGINT", 1, null), new ColumnDesc("itemid", "STRING", 2, null), new ColumnDesc("userid", "STRING", 3, null), new ColumnDesc("ct", "INT", 4, null), new ColumnDesc("price", "DOUBLE", 5, null), new ColumnDesc("username", "STRING", 6, null), new ColumnDesc("email", "STRING", 7, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(1L, "scotch", "samuel", 1, 56.99d, "Samuel Jackson", "sjackson@gmail.com"))));
    } finally {
        datasetFramework.deleteInstance(userTableID);
        datasetFramework.deleteInstance(purchaseTableID);
        datasetFramework.deleteInstance(expandedTableID);
    }
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) Table(co.cask.cdap.api.dataset.table.Table) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) Put(co.cask.cdap.api.dataset.table.Put) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 24 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreTableTestRun method testSelectStar.

private void testSelectStar(String tableToQuery, String tableInSchema, Schema schema) throws Exception {
    List<ColumnDesc> expectedSchema = schema.equals(SCHEMA) ? Lists.newArrayList(new ColumnDesc(tableInSchema + ".bool_field", "BOOLEAN", 1, null), new ColumnDesc(tableInSchema + ".int_field", "INT", 2, null), new ColumnDesc(tableInSchema + ".long_field", "BIGINT", 3, null), new ColumnDesc(tableInSchema + ".float_field", "FLOAT", 4, null), new ColumnDesc(tableInSchema + ".double_field", "DOUBLE", 5, null), new ColumnDesc(tableInSchema + ".bytes_field", "BINARY", 6, null), new ColumnDesc(tableInSchema + ".string_field", "STRING", 7, null)) : Lists.newArrayList(new ColumnDesc(tableInSchema + ".int_field", "INT", 1, null), new ColumnDesc(tableInSchema + ".long_field", "BIGINT", 2, null), new ColumnDesc(tableInSchema + ".float_field", "FLOAT", 3, null), new ColumnDesc(tableInSchema + ".double_field", "BINARY", 4, null), new ColumnDesc(tableInSchema + ".bytes_field", "STRING", 5, null), new ColumnDesc(tableInSchema + ".new_field", "STRING", 6, null), new ColumnDesc(tableInSchema + ".string_field", "STRING", 7, null));
    ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + tableToQuery).get();
    // check SCHEMA
    Assert.assertEquals(expectedSchema, results.getResultSchema());
    List<Object> columns = results.next().getColumns();
    // check record1, account for the variability between SCHEMA and NEW_SCHEMA
    int index = 0;
    if (schema.equals(SCHEMA)) {
        Assert.assertFalse((Boolean) columns.get(index++));
    }
    Assert.assertEquals(Integer.MAX_VALUE, columns.get(index++));
    Assert.assertEquals(Long.MAX_VALUE, columns.get(index++));
    // why does this come back as a double when it's a float???
    Assert.assertTrue(Math.abs(3.14f - (Double) columns.get(index++)) < 0.000001);
    if (schema.equals(SCHEMA)) {
        Assert.assertTrue(Math.abs(3.14 - (Double) columns.get(index++)) < 0.000001);
        Assert.assertArrayEquals(new byte[] { 'A', 'B', 'C' }, (byte[]) columns.get(index++));
    } else {
        Assert.assertArrayEquals(Bytes.toBytes(3.14D), (byte[]) columns.get(index++));
        Assert.assertEquals("ABC", columns.get(index++));
        Assert.assertNull(columns.get(index++));
    }
    Assert.assertEquals("row1", columns.get(index));
    // should not be any more
    Assert.assertFalse(results.hasNext());
}
Also used : ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult)

Example 25 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class HiveExploreTableTestRun method testInsert.

@Test
public void testInsert() throws Exception {
    setupTable(null, null);
    DatasetId otherTable = NAMESPACE_ID.dataset("othertable");
    Schema schema = Schema.recordOf("record", Schema.Field.of("value", Schema.of(Schema.Type.INT)), Schema.Field.of("id", Schema.of(Schema.Type.STRING)));
    datasetFramework.addInstance(Table.class.getName(), otherTable, TableProperties.builder().setSchema(schema).setRowFieldName("id").build());
    try {
        String command = String.format("insert into %s select int_field, string_field from %s", getDatasetHiveName(otherTable), MY_TABLE_NAME);
        ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
        Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
        command = String.format("select id, value from %s", getDatasetHiveName(otherTable));
        runCommand(NAMESPACE_ID, command, true, Lists.newArrayList(new ColumnDesc("id", "STRING", 1, null), new ColumnDesc("value", "INT", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("row1", Integer.MAX_VALUE))));
    } finally {
        datasetFramework.deleteInstance(MY_TABLE);
        datasetFramework.deleteInstance(otherTable);
    }
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

ColumnDesc (co.cask.cdap.proto.ColumnDesc)38 QueryResult (co.cask.cdap.proto.QueryResult)23 Test (org.junit.Test)21 DatasetId (co.cask.cdap.proto.id.DatasetId)14 ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)8 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 SQLException (java.sql.SQLException)7 Location (org.apache.twill.filesystem.Location)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 Schema (co.cask.cdap.api.data.schema.Schema)4 ExploreClient (co.cask.cdap.explore.client.ExploreClient)4 MockExploreClient (co.cask.cdap.explore.client.MockExploreClient)4 QueryStatus (co.cask.cdap.proto.QueryStatus)4 StreamId (co.cask.cdap.proto.id.StreamId)4 ResultSet (java.sql.ResultSet)4 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties)3 Table (co.cask.cdap.api.dataset.table.Table)3 QueryHandle (co.cask.cdap.proto.QueryHandle)3