Search in sources :

Example 36 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class Hive12CDH5ExploreService method doFetchNextResults.

@Override
protected List<QueryResult> doFetchNextResults(OperationHandle handle, FetchOrientation fetchOrientation, int size) throws Exception {
    Class<?> cliServiceClass = Class.forName("org.apache.hive.service.cli.CLIService");
    Method fetchResultsMethod = cliServiceClass.getMethod("fetchResults", OperationHandle.class, FetchOrientation.class, Long.TYPE);
    Object rowSet = fetchResultsMethod.invoke(getCliService(), handle, fetchOrientation, size);
    ImmutableList.Builder<QueryResult> rowsBuilder = ImmutableList.builder();
    Class<?> rowSetClass = Class.forName("org.apache.hive.service.cli.RowSet");
    Method toTRowSetMethod = rowSetClass.getMethod("toTRowSet");
    TRowSet tRowSet = (TRowSet) toTRowSetMethod.invoke(rowSet);
    for (TRow tRow : tRowSet.getRows()) {
        List<Object> cols = Lists.newArrayList();
        for (TColumnValue tColumnValue : tRow.getColVals()) {
            cols.add(HiveUtilities.tColumnToObject(tColumnValue));
        }
        rowsBuilder.add(new QueryResult(cols));
    }
    return rowsBuilder.build();
}
Also used : TRowSet(org.apache.hive.service.cli.thrift.TRowSet) TRow(org.apache.hive.service.cli.thrift.TRow) QueryResult(co.cask.cdap.proto.QueryResult) ImmutableList(com.google.common.collect.ImmutableList) Method(java.lang.reflect.Method) TColumnValue(org.apache.hive.service.cli.thrift.TColumnValue)

Example 37 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class Hive14ExploreService method doFetchNextResults.

@Override
protected List<QueryResult> doFetchNextResults(OperationHandle handle, FetchOrientation fetchOrientation, int size) throws Exception {
    RowSet rowSet = getCliService().fetchResults(handle, fetchOrientation, size, FetchType.QUERY_OUTPUT);
    ImmutableList.Builder<QueryResult> rowsBuilder = ImmutableList.builder();
    for (Object[] row : rowSet) {
        List<Object> cols = Lists.newArrayList(row);
        rowsBuilder.add(new QueryResult(cols));
    }
    return rowsBuilder.build();
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) ImmutableList(com.google.common.collect.ImmutableList) RowSet(org.apache.hive.service.cli.RowSet)

Example 38 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class GetStreamStatsCommand method perform.

@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
    long currentTime = System.currentTimeMillis();
    StreamId streamId = cliConfig.getCurrentNamespace().stream(arguments.get(ArgumentName.STREAM.toString()));
    // limit limit to [1, MAX_LIMIT]
    Integer limitInput = arguments.getIntOptional(ArgumentName.LIMIT.toString(), DEFAULT_LIMIT);
    // we know we're passing a non-null default, so limitInput should never be null.
    Preconditions.checkNotNull(limitInput);
    int limit = Math.max(1, Math.min(MAX_LIMIT, limitInput));
    long startTime = getTimestamp(arguments.getOptional(ArgumentName.START_TIME.toString(), "min"), currentTime);
    long endTime = getTimestamp(arguments.getOptional(ArgumentName.END_TIME.toString(), "max"), currentTime);
    // hack to validate streamId
    StreamProperties config = streamClient.getConfig(streamId);
    if (config.getFormat().getName().equals("text")) {
        output.printf("No schema found for stream '%s'", streamId.getEntityName());
        output.println();
        return;
    }
    // build processorMap: Hive column name -> StatsProcessor
    Map<String, Set<StatsProcessor>> processorMap = new HashMap<>();
    Schema streamSchema = config.getFormat().getSchema();
    for (Schema.Field field : streamSchema.getFields()) {
        Schema fieldSchema = field.getSchema();
        String hiveColumnName = cdapSchemaColumName2HiveColumnName(streamId, field.getName());
        processorMap.put(hiveColumnName, getProcessorsForType(fieldSchema.getType(), fieldSchema.getUnionSchemas()));
    }
    // get a list of stream events and calculates various statistics about the events
    String timestampCol = getTimestampHiveColumn(streamId);
    ListenableFuture<ExploreExecutionResult> resultsFuture = queryClient.execute(streamId.getParent(), "SELECT * FROM " + getHiveTableName(streamId) + " WHERE " + timestampCol + " BETWEEN " + startTime + " AND " + endTime + " LIMIT " + limit);
    ExploreExecutionResult results = resultsFuture.get(1, TimeUnit.MINUTES);
    List<ColumnDesc> schema = results.getResultSchema();
    // apply StatsProcessors to every element in every row
    int rows = 0;
    while (results.hasNext()) {
        rows++;
        QueryResult row = results.next();
        for (int i = 0; i < row.getColumns().size(); i++) {
            Object column = row.getColumns().get(i);
            ColumnDesc columnDesc = schema.get(i);
            String columnName = columnDesc.getName();
            if (isUserHiveColumn(streamId, columnName)) {
                Set<StatsProcessor> processors = processorMap.get(columnName);
                if (processors != null) {
                    for (StatsProcessor processor : processors) {
                        processor.process(column);
                    }
                }
            }
        }
    }
    // print report
    for (ColumnDesc columnDesc : schema) {
        if (isUserHiveColumn(streamId, columnDesc.getName())) {
            String truncatedColumnName = getTruncatedColumnName(streamId, columnDesc.getName());
            output.printf("column: %s, type: %s", truncatedColumnName, columnDesc.getType());
            output.println();
            Set<StatsProcessor> processors = processorMap.get(columnDesc.getName());
            if (processors != null && !processors.isEmpty()) {
                for (StatsProcessor processor : processors) {
                    processor.printReport(output);
                }
                output.println();
            } else {
                output.println("No statistics available");
                output.println();
            }
        }
    }
    output.printf("Analyzed %d Stream events in the time range [%d, %d]...", rows, startTime, endTime);
    output.println();
    output.println();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) StreamProperties(co.cask.cdap.proto.StreamProperties) ColumnDesc(co.cask.cdap.proto.ColumnDesc) QueryResult(co.cask.cdap.proto.QueryResult) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult)

Example 39 with QueryResult

use of co.cask.cdap.proto.QueryResult in project cdap by caskdata.

the class QueryClientTest method executeBasicQuery.

private void executeBasicQuery(NamespaceId namespace, String instanceName) throws Exception {
    // Hive replaces the periods with underscores
    String query = "select * from dataset_" + instanceName.replace(".", "_");
    ExploreExecutionResult executionResult = queryClient.execute(namespace, query).get();
    Assert.assertNotNull(executionResult.getResultSchema());
    List<QueryResult> results = Lists.newArrayList(executionResult);
    Assert.assertNotNull(results);
    Assert.assertEquals(2, results.size());
    Assert.assertEquals("bob", Bytes.toString((byte[]) results.get(0).getColumns().get(0)));
    Assert.assertEquals("123", Bytes.toString((byte[]) results.get(0).getColumns().get(1)));
    Assert.assertEquals("joe", Bytes.toString((byte[]) results.get(1).getColumns().get(0)));
    Assert.assertEquals("321", Bytes.toString((byte[]) results.get(1).getColumns().get(1)));
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult)

Aggregations

QueryResult (co.cask.cdap.proto.QueryResult)39 ColumnDesc (co.cask.cdap.proto.ColumnDesc)23 Test (org.junit.Test)18 DatasetId (co.cask.cdap.proto.id.DatasetId)16 ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)9 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)8 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 Location (org.apache.twill.filesystem.Location)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 ImmutableList (com.google.common.collect.ImmutableList)6 SQLException (java.sql.SQLException)6 HandleNotFoundException (co.cask.cdap.explore.service.HandleNotFoundException)5 QueryHandle (co.cask.cdap.proto.QueryHandle)4 StreamId (co.cask.cdap.proto.id.StreamId)4 Schema (co.cask.cdap.api.data.schema.Schema)3 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties)3 Table (co.cask.cdap.api.dataset.table.Table)3 ExploreClient (co.cask.cdap.explore.client.ExploreClient)3 MockExploreClient (co.cask.cdap.explore.client.MockExploreClient)3