use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.
the class DruidGroupByQueryRecordReader method getCurrentValue.
@Override
public DruidWritable getCurrentValue() throws IOException, InterruptedException {
// Create new value
DruidWritable value = new DruidWritable();
// 1) The timestamp column
value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
// 2) The dimension columns
for (int i = 0; i < query.getDimensions().size(); i++) {
DimensionSpec ds = query.getDimensions().get(i);
List<String> dims = current.getDimension(ds.getDimension());
if (dims.size() == 0) {
// NULL value for dimension
value.getValue().put(ds.getOutputName(), null);
} else {
int pos = dims.size() - indexes[i] - 1;
value.getValue().put(ds.getOutputName(), dims.get(pos));
}
}
int counter = 0;
// 3) The aggregation columns
for (AggregatorFactory af : query.getAggregatorSpecs()) {
switch(extractors[counter++]) {
case FLOAT:
value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
break;
case LONG:
value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
break;
}
}
// 4) The post-aggregation columns
for (PostAggregator pa : query.getPostAggregatorSpecs()) {
assert extractors[counter++] == Extract.FLOAT;
value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
}
return value;
}
use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.
the class DruidSerDe method inferSchema.
/* Timeseries query */
private void inferSchema(TimeseriesQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes) {
// Timestamp column
columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
columnTypes.add(TypeInfoFactory.timestampTypeInfo);
// Aggregator columns
for (AggregatorFactory af : query.getAggregatorSpecs()) {
columnNames.add(af.getName());
columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName()));
}
// different types for post-aggregation functions
for (PostAggregator pa : query.getPostAggregatorSpecs()) {
columnNames.add(pa.getName());
columnTypes.add(TypeInfoFactory.floatTypeInfo);
}
}
use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.
the class DruidSerDe method inferSchema.
/* TopN query */
private void inferSchema(TopNQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes) {
// Timestamp column
columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
columnTypes.add(TypeInfoFactory.timestampTypeInfo);
// Dimension column
columnNames.add(query.getDimensionSpec().getOutputName());
columnTypes.add(TypeInfoFactory.stringTypeInfo);
// Aggregator columns
for (AggregatorFactory af : query.getAggregatorSpecs()) {
columnNames.add(af.getName());
columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName()));
}
// different types for post-aggregation functions
for (PostAggregator pa : query.getPostAggregatorSpecs()) {
columnNames.add(pa.getName());
columnTypes.add(TypeInfoFactory.floatTypeInfo);
}
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.
@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
if (arg1 == null) {
return arg2;
}
if (arg2 == null) {
return arg1;
}
List<Interval> newIntervals = null;
if (arg1.getIntervals() != null) {
newIntervals = Lists.newArrayList();
newIntervals.addAll(arg1.getIntervals());
}
if (arg2.getIntervals() != null) {
if (newIntervals == null) {
newIntervals = Lists.newArrayList();
}
newIntervals.addAll(arg2.getIntervals());
}
final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
final String columnName = entry.getKey();
columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
rightColumnNames.remove(columnName);
}
for (String columnName : rightColumnNames) {
columns.put(columnName, rightColumns.get(columnName));
}
final Map<String, AggregatorFactory> aggregators = Maps.newHashMap();
if (lenientAggregatorMerge) {
// Merge each aggregator individually, ignoring nulls
for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
if (analysis.getAggregators() != null) {
for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
final String aggregatorName = entry.getKey();
final AggregatorFactory aggregator = entry.getValue();
AggregatorFactory merged = aggregators.get(aggregatorName);
if (merged != null) {
try {
merged = merged.getMergingFactory(aggregator);
} catch (AggregatorFactoryNotMergeableException e) {
merged = null;
}
} else {
merged = aggregator;
}
aggregators.put(aggregatorName, merged);
}
}
}
} else {
final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[arg1.getAggregators().size()]) : null;
final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[arg2.getAggregators().size()]) : null;
final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
if (merged != null) {
for (AggregatorFactory aggregator : merged) {
aggregators.put(aggregator.getName(), aggregator);
}
}
}
final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
final String mergedId;
if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
mergedId = arg1.getId();
} else {
mergedId = "merged";
}
final Boolean rollup;
if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
rollup = arg1.isRollup();
} else {
rollup = null;
}
return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class SegmentMetadataQueryRunnerFactory method createRunner.
@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
return new QueryRunner<SegmentAnalysis>() {
@Override
public Sequence<SegmentAnalysis> run(Query<SegmentAnalysis> inQ, Map<String, Object> responseContext) {
SegmentMetadataQuery query = (SegmentMetadataQuery) inQ;
final SegmentAnalyzer analyzer = new SegmentAnalyzer(query.getAnalysisTypes());
final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
final long numRows = analyzer.numRows(segment);
long totalSize = 0;
if (analyzer.analyzingSize()) {
// Initialize with the size of the whitespace, 1 byte per
totalSize = analyzedColumns.size() * numRows;
}
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
ColumnIncluderator includerator = query.getToInclude();
for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
final String columnName = entry.getKey();
final ColumnAnalysis column = entry.getValue();
if (!column.isError()) {
totalSize += column.getSize();
}
if (includerator.include(columnName)) {
columns.put(columnName, column);
}
}
List<Interval> retIntervals = query.analyzingInterval() ? Arrays.asList(segment.getDataInterval()) : null;
final Map<String, AggregatorFactory> aggregators;
Metadata metadata = null;
if (query.hasAggregators()) {
metadata = segment.asStorageAdapter().getMetadata();
if (metadata != null && metadata.getAggregators() != null) {
aggregators = Maps.newHashMap();
for (AggregatorFactory aggregator : metadata.getAggregators()) {
aggregators.put(aggregator.getName(), aggregator);
}
} else {
aggregators = null;
}
} else {
aggregators = null;
}
final TimestampSpec timestampSpec;
if (query.hasTimestampSpec()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
} else {
timestampSpec = null;
}
final Granularity queryGranularity;
if (query.hasQueryGranularity()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
} else {
queryGranularity = null;
}
Boolean rollup = null;
if (query.hasRollup()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
rollup = metadata != null ? metadata.isRollup() : null;
if (rollup == null) {
// in this case, this segment is built before no-rollup function is coded,
// thus it is built with rollup
rollup = Boolean.TRUE;
}
}
return Sequences.simple(Arrays.asList(new SegmentAnalysis(segment.getIdentifier(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
}
};
}
Aggregations