use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class QueryResultsBodyProducer method getCSVHeaders.
private String getCSVHeaders(List<ColumnDesc> schema) throws HandleNotFoundException, SQLException, ExploreException {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (ColumnDesc columnDesc : schema) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append(columnDesc.getName());
}
return sb.toString();
}
use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class ExploreResultSet method getMetaData.
@Override
public ResultSetMetaData getMetaData() throws SQLException {
if (isClosed()) {
throw new SQLException("Resultset is closed");
}
if (metaData == null) {
try {
List<ColumnDesc> columnDescs = executionResult.getResultSchema();
metaData = new ExploreResultSetMetaData(columnDescs);
} catch (ExploreException e) {
LOG.error("Caught exception", e);
throw new SQLException(e);
}
}
return metaData;
}
use of co.cask.cdap.proto.ColumnDesc in project cdap by caskdata.
the class GetStreamStatsCommand method perform.
@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
long currentTime = System.currentTimeMillis();
StreamId streamId = cliConfig.getCurrentNamespace().stream(arguments.get(ArgumentName.STREAM.toString()));
// limit limit to [1, MAX_LIMIT]
Integer limitInput = arguments.getIntOptional(ArgumentName.LIMIT.toString(), DEFAULT_LIMIT);
// we know we're passing a non-null default, so limitInput should never be null.
Preconditions.checkNotNull(limitInput);
int limit = Math.max(1, Math.min(MAX_LIMIT, limitInput));
long startTime = getTimestamp(arguments.getOptional(ArgumentName.START_TIME.toString(), "min"), currentTime);
long endTime = getTimestamp(arguments.getOptional(ArgumentName.END_TIME.toString(), "max"), currentTime);
// hack to validate streamId
StreamProperties config = streamClient.getConfig(streamId);
if (config.getFormat().getName().equals("text")) {
output.printf("No schema found for stream '%s'", streamId.getEntityName());
output.println();
return;
}
// build processorMap: Hive column name -> StatsProcessor
Map<String, Set<StatsProcessor>> processorMap = new HashMap<>();
Schema streamSchema = config.getFormat().getSchema();
for (Schema.Field field : streamSchema.getFields()) {
Schema fieldSchema = field.getSchema();
String hiveColumnName = cdapSchemaColumName2HiveColumnName(streamId, field.getName());
processorMap.put(hiveColumnName, getProcessorsForType(fieldSchema.getType(), fieldSchema.getUnionSchemas()));
}
// get a list of stream events and calculates various statistics about the events
String timestampCol = getTimestampHiveColumn(streamId);
ListenableFuture<ExploreExecutionResult> resultsFuture = queryClient.execute(streamId.getParent(), "SELECT * FROM " + getHiveTableName(streamId) + " WHERE " + timestampCol + " BETWEEN " + startTime + " AND " + endTime + " LIMIT " + limit);
ExploreExecutionResult results = resultsFuture.get(1, TimeUnit.MINUTES);
List<ColumnDesc> schema = results.getResultSchema();
// apply StatsProcessors to every element in every row
int rows = 0;
while (results.hasNext()) {
rows++;
QueryResult row = results.next();
for (int i = 0; i < row.getColumns().size(); i++) {
Object column = row.getColumns().get(i);
ColumnDesc columnDesc = schema.get(i);
String columnName = columnDesc.getName();
if (isUserHiveColumn(streamId, columnName)) {
Set<StatsProcessor> processors = processorMap.get(columnName);
if (processors != null) {
for (StatsProcessor processor : processors) {
processor.process(column);
}
}
}
}
}
// print report
for (ColumnDesc columnDesc : schema) {
if (isUserHiveColumn(streamId, columnDesc.getName())) {
String truncatedColumnName = getTruncatedColumnName(streamId, columnDesc.getName());
output.printf("column: %s, type: %s", truncatedColumnName, columnDesc.getType());
output.println();
Set<StatsProcessor> processors = processorMap.get(columnDesc.getName());
if (processors != null && !processors.isEmpty()) {
for (StatsProcessor processor : processors) {
processor.printReport(output);
}
output.println();
} else {
output.println("No statistics available");
output.println();
}
}
}
output.printf("Analyzed %d Stream events in the time range [%d, %d]...", rows, startTime, endTime);
output.println();
output.println();
}
Aggregations