Examples with ColumnDesc - co.cask.cdap.proto.ColumnDesc

Example 36 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class QueryResultsBodyProducer method getCSVHeaders.

private String getCSVHeaders(List<ColumnDesc> schema) throws HandleNotFoundException, SQLException, ExploreException {
    StringBuilder sb = new StringBuilder();
    boolean first = true;
    for (ColumnDesc columnDesc : schema) {
        if (first) {
            first = false;
        } else {
            sb.append(',');
        }
        sb.append(columnDesc.getName());
    }
    return sb.toString();
}

Also used : ColumnDesc(co.cask.cdap.proto.ColumnDesc)

Example 37 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class ExploreResultSet method getMetaData.

@Override
public ResultSetMetaData getMetaData() throws SQLException {
    if (isClosed()) {
        throw new SQLException("Resultset is closed");
    }
    if (metaData == null) {
        try {
            List<ColumnDesc> columnDescs = executionResult.getResultSchema();
            metaData = new ExploreResultSetMetaData(columnDescs);
        } catch (ExploreException e) {
            LOG.error("Caught exception", e);
            throw new SQLException(e);
        }
    }
    return metaData;
}

Also used : SQLException(java.sql.SQLException) ColumnDesc(co.cask.cdap.proto.ColumnDesc) ExploreException(co.cask.cdap.explore.service.ExploreException)

Example 38 with ColumnDesc

use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.

the class GetStreamStatsCommand method perform.

@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
    long currentTime = System.currentTimeMillis();
    StreamId streamId = cliConfig.getCurrentNamespace().stream(arguments.get(ArgumentName.STREAM.toString()));
    // limit limit to [1, MAX_LIMIT]
    Integer limitInput = arguments.getIntOptional(ArgumentName.LIMIT.toString(), DEFAULT_LIMIT);
    // we know we're passing a non-null default, so limitInput should never be null.
    Preconditions.checkNotNull(limitInput);
    int limit = Math.max(1, Math.min(MAX_LIMIT, limitInput));
    long startTime = getTimestamp(arguments.getOptional(ArgumentName.START_TIME.toString(), "min"), currentTime);
    long endTime = getTimestamp(arguments.getOptional(ArgumentName.END_TIME.toString(), "max"), currentTime);
    // hack to validate streamId
    StreamProperties config = streamClient.getConfig(streamId);
    if (config.getFormat().getName().equals("text")) {
        output.printf("No schema found for stream '%s'", streamId.getEntityName());
        output.println();
        return;
    }
    // build processorMap: Hive column name -> StatsProcessor
    Map<String, Set<StatsProcessor>> processorMap = new HashMap<>();
    Schema streamSchema = config.getFormat().getSchema();
    for (Schema.Field field : streamSchema.getFields()) {
        Schema fieldSchema = field.getSchema();
        String hiveColumnName = cdapSchemaColumName2HiveColumnName(streamId, field.getName());
        processorMap.put(hiveColumnName, getProcessorsForType(fieldSchema.getType(), fieldSchema.getUnionSchemas()));
    }
    // get a list of stream events and calculates various statistics about the events
    String timestampCol = getTimestampHiveColumn(streamId);
    ListenableFuture<ExploreExecutionResult> resultsFuture = queryClient.execute(streamId.getParent(), "SELECT * FROM " + getHiveTableName(streamId) + " WHERE " + timestampCol + " BETWEEN " + startTime + " AND " + endTime + " LIMIT " + limit);
    ExploreExecutionResult results = resultsFuture.get(1, TimeUnit.MINUTES);
    List<ColumnDesc> schema = results.getResultSchema();
    // apply StatsProcessors to every element in every row
    int rows = 0;
    while (results.hasNext()) {
        rows++;
        QueryResult row = results.next();
        for (int i = 0; i < row.getColumns().size(); i++) {
            Object column = row.getColumns().get(i);
            ColumnDesc columnDesc = schema.get(i);
            String columnName = columnDesc.getName();
            if (isUserHiveColumn(streamId, columnName)) {
                Set<StatsProcessor> processors = processorMap.get(columnName);
                if (processors != null) {
                    for (StatsProcessor processor : processors) {
                        processor.process(column);
                    }
                }
            }
        }
    }
    // print report
    for (ColumnDesc columnDesc : schema) {
        if (isUserHiveColumn(streamId, columnDesc.getName())) {
            String truncatedColumnName = getTruncatedColumnName(streamId, columnDesc.getName());
            output.printf("column: %s, type: %s", truncatedColumnName, columnDesc.getType());
            output.println();
            Set<StatsProcessor> processors = processorMap.get(columnDesc.getName());
            if (processors != null && !processors.isEmpty()) {
                for (StatsProcessor processor : processors) {
                    processor.printReport(output);
                }
                output.println();
            } else {
                output.println("No statistics available");
                output.println();
            }
        }
    }
    output.printf("Analyzed %d Stream events in the time range [%d, %d]...", rows, startTime, endTime);
    output.println();
    output.println();
}

Also used : StreamId(co.cask.cdap.proto.id.StreamId) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) StreamProperties(co.cask.cdap.proto.StreamProperties) ColumnDesc(co.cask.cdap.proto.ColumnDesc) QueryResult(co.cask.cdap.proto.QueryResult) ExploreExecutionResult(co.cask.cdap.explore.client.ExploreExecutionResult)

Aggregations

ColumnDesc (co.cask.cdap.proto.ColumnDesc)38 QueryResult (co.cask.cdap.proto.QueryResult)23 Test (org.junit.Test)21 DatasetId (co.cask.cdap.proto.id.DatasetId)14 ExploreExecutionResult (co.cask.cdap.explore.client.ExploreExecutionResult)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)8 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 SQLException (java.sql.SQLException)7 Location (org.apache.twill.filesystem.Location)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 Schema (co.cask.cdap.api.data.schema.Schema)4 ExploreClient (co.cask.cdap.explore.client.ExploreClient)4 MockExploreClient (co.cask.cdap.explore.client.MockExploreClient)4 QueryStatus (co.cask.cdap.proto.QueryStatus)4 StreamId (co.cask.cdap.proto.id.StreamId)4 ResultSet (java.sql.ResultSet)4 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)3 PartitionedFileSetProperties (co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties)3 Table (co.cask.cdap.api.dataset.table.Table)3 QueryHandle (co.cask.cdap.proto.QueryHandle)3