use of io.druid.query.dimension.DimensionSpec in project hive by apache.
the class DruidSerDe method inferSchema.
/* GroupBy query */
private void inferSchema(GroupByQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes) {
// Timestamp column
columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
columnTypes.add(TypeInfoFactory.timestampTypeInfo);
// Dimension columns
for (DimensionSpec ds : query.getDimensions()) {
columnNames.add(ds.getOutputName());
columnTypes.add(TypeInfoFactory.stringTypeInfo);
}
// Aggregator columns
for (AggregatorFactory af : query.getAggregatorSpecs()) {
columnNames.add(af.getName());
columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName()));
}
// different types for post-aggregation functions
for (PostAggregator pa : query.getPostAggregatorSpecs()) {
columnNames.add(pa.getName());
columnTypes.add(TypeInfoFactory.floatTypeInfo);
}
}
use of io.druid.query.dimension.DimensionSpec in project hive by apache.
the class DruidSerDe method inferSchema.
/* Select query */
private void inferSchema(SelectQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes, String address) throws SerDeException {
// Timestamp column
columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
columnTypes.add(TypeInfoFactory.timestampTypeInfo);
// Dimension columns
for (DimensionSpec ds : query.getDimensions()) {
columnNames.add(ds.getOutputName());
columnTypes.add(TypeInfoFactory.stringTypeInfo);
}
// The type for metric columns is not explicit in the query, thus in this case
// we need to emit a metadata query to know their type
SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
builder.dataSource(query.getDataSource());
builder.merge(true);
builder.analysisTypes();
SegmentMetadataQuery metadataQuery = builder.build();
// Execute query in Druid
SegmentAnalysis schemaInfo;
try {
schemaInfo = submitMetadataRequest(address, metadataQuery);
} catch (IOException e) {
throw new SerDeException(e);
}
if (schemaInfo == null) {
throw new SerDeException("Connected to Druid but could not retrieve datasource information");
}
for (String metric : query.getMetrics()) {
columnNames.add(metric);
columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(schemaInfo.getColumns().get(metric).getType()));
}
}
use of io.druid.query.dimension.DimensionSpec in project hive by apache.
the class DruidGroupByQueryRecordReader method initDimensionTypes.
private void initDimensionTypes() throws IOException {
// @TODO move this out of here to org.apache.hadoop.hive.druid.serde.DruidSerDe
List<DimensionSpec> dimensionSpecList = ((GroupByQuery) query).getDimensions();
List<DimensionSpec> extractionDimensionSpecList = dimensionSpecList.stream().filter(dimensionSpecs -> dimensionSpecs instanceof ExtractionDimensionSpec).collect(Collectors.toList());
extractionDimensionSpecList.stream().forEach(dimensionSpec -> {
ExtractionDimensionSpec extractionDimensionSpec = (ExtractionDimensionSpec) dimensionSpec;
if (extractionDimensionSpec.getExtractionFn() instanceof TimeFormatExtractionFn) {
final TimeFormatExtractionFn timeFormatExtractionFn = (TimeFormatExtractionFn) extractionDimensionSpec.getExtractionFn();
if (timeFormatExtractionFn == null || timeFormatExtractionFn.getFormat().equals(ISO_TIME_FORMAT)) {
timeExtractionFields.add(extractionDimensionSpec.getOutputName());
} else {
intFormattedTimeExtractionFields.add(extractionDimensionSpec.getOutputName());
}
}
});
}
use of io.druid.query.dimension.DimensionSpec in project hive by apache.
the class DruidGroupByQueryRecordReader method next.
@Override
public boolean next(NullWritable key, DruidWritable value) {
if (nextKeyValue()) {
// Update value
value.getValue().clear();
// 1) The timestamp column
value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
// 2) The dimension columns
for (int i = 0; i < query.getDimensions().size(); i++) {
DimensionSpec ds = query.getDimensions().get(i);
List<String> dims = current.getDimension(ds.getDimension());
if (dims.size() == 0) {
// NULL value for dimension
value.getValue().put(ds.getOutputName(), null);
} else {
int pos = dims.size() - indexes[i] - 1;
value.getValue().put(ds.getOutputName(), dims.get(pos));
}
}
int counter = 0;
// 3) The aggregation columns
for (AggregatorFactory af : query.getAggregatorSpecs()) {
switch(extractors[counter++]) {
case FLOAT:
value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
break;
case LONG:
value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
break;
}
}
// 4) The post-aggregation columns
for (PostAggregator pa : query.getPostAggregatorSpecs()) {
assert extractors[counter++] == Extract.FLOAT;
value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
}
return true;
}
return false;
}
use of io.druid.query.dimension.DimensionSpec in project hive by apache.
the class DruidGroupByQueryRecordReader method getCurrentValue.
@Override
public DruidWritable getCurrentValue() throws IOException, InterruptedException {
// Create new value
DruidWritable value = new DruidWritable();
// 1) The timestamp column
value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
// 2) The dimension columns
for (int i = 0; i < query.getDimensions().size(); i++) {
DimensionSpec ds = query.getDimensions().get(i);
List<String> dims = current.getDimension(ds.getDimension());
if (dims.size() == 0) {
// NULL value for dimension
value.getValue().put(ds.getOutputName(), null);
} else {
int pos = dims.size() - indexes[i] - 1;
value.getValue().put(ds.getOutputName(), dims.get(pos));
}
}
int counter = 0;
// 3) The aggregation columns
for (AggregatorFactory af : query.getAggregatorSpecs()) {
switch(extractors[counter++]) {
case FLOAT:
value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
break;
case LONG:
value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
break;
}
}
// 4) The post-aggregation columns
for (PostAggregator pa : query.getPostAggregatorSpecs()) {
assert extractors[counter++] == Extract.FLOAT;
value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
}
return value;
}
Aggregations