Search in sources :

Example 81 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.

the class DruidGroupByQueryRecordReader method getCurrentValue.

@Override
public DruidWritable getCurrentValue() throws IOException, InterruptedException {
    // Create new value
    DruidWritable value = new DruidWritable();
    // 1) The timestamp column
    value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
    // 2) The dimension columns
    for (int i = 0; i < query.getDimensions().size(); i++) {
        DimensionSpec ds = query.getDimensions().get(i);
        List<String> dims = current.getDimension(ds.getDimension());
        if (dims.size() == 0) {
            // NULL value for dimension
            value.getValue().put(ds.getOutputName(), null);
        } else {
            int pos = dims.size() - indexes[i] - 1;
            value.getValue().put(ds.getOutputName(), dims.get(pos));
        }
    }
    int counter = 0;
    // 3) The aggregation columns
    for (AggregatorFactory af : query.getAggregatorSpecs()) {
        switch(extractors[counter++]) {
            case FLOAT:
                value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
                break;
            case LONG:
                value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
                break;
        }
    }
    // 4) The post-aggregation columns
    for (PostAggregator pa : query.getPostAggregatorSpecs()) {
        assert extractors[counter++] == Extract.FLOAT;
        value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
    }
    return value;
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 82 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.

the class DruidSerDe method inferSchema.

/* Timeseries query */
private void inferSchema(TimeseriesQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes) {
    // Timestamp column
    columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
    columnTypes.add(TypeInfoFactory.timestampTypeInfo);
    // Aggregator columns
    for (AggregatorFactory af : query.getAggregatorSpecs()) {
        columnNames.add(af.getName());
        columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName()));
    }
    // different types for post-aggregation functions
    for (PostAggregator pa : query.getPostAggregatorSpecs()) {
        columnNames.add(pa.getName());
        columnTypes.add(TypeInfoFactory.floatTypeInfo);
    }
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 83 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.

the class DruidSerDe method inferSchema.

/* TopN query */
private void inferSchema(TopNQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes) {
    // Timestamp column
    columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
    columnTypes.add(TypeInfoFactory.timestampTypeInfo);
    // Dimension column
    columnNames.add(query.getDimensionSpec().getOutputName());
    columnTypes.add(TypeInfoFactory.stringTypeInfo);
    // Aggregator columns
    for (AggregatorFactory af : query.getAggregatorSpecs()) {
        columnNames.add(af.getName());
        columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName()));
    }
    // different types for post-aggregation functions
    for (PostAggregator pa : query.getPostAggregatorSpecs()) {
        columnNames.add(pa.getName());
        columnTypes.add(TypeInfoFactory.floatTypeInfo);
    }
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 84 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.

@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
    if (arg1 == null) {
        return arg2;
    }
    if (arg2 == null) {
        return arg1;
    }
    List<Interval> newIntervals = null;
    if (arg1.getIntervals() != null) {
        newIntervals = Lists.newArrayList();
        newIntervals.addAll(arg1.getIntervals());
    }
    if (arg2.getIntervals() != null) {
        if (newIntervals == null) {
            newIntervals = Lists.newArrayList();
        }
        newIntervals.addAll(arg2.getIntervals());
    }
    final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
    final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
    Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
    Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
    for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
        final String columnName = entry.getKey();
        columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
        rightColumnNames.remove(columnName);
    }
    for (String columnName : rightColumnNames) {
        columns.put(columnName, rightColumns.get(columnName));
    }
    final Map<String, AggregatorFactory> aggregators = Maps.newHashMap();
    if (lenientAggregatorMerge) {
        // Merge each aggregator individually, ignoring nulls
        for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
            if (analysis.getAggregators() != null) {
                for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
                    final String aggregatorName = entry.getKey();
                    final AggregatorFactory aggregator = entry.getValue();
                    AggregatorFactory merged = aggregators.get(aggregatorName);
                    if (merged != null) {
                        try {
                            merged = merged.getMergingFactory(aggregator);
                        } catch (AggregatorFactoryNotMergeableException e) {
                            merged = null;
                        }
                    } else {
                        merged = aggregator;
                    }
                    aggregators.put(aggregatorName, merged);
                }
            }
        }
    } else {
        final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[arg1.getAggregators().size()]) : null;
        final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[arg2.getAggregators().size()]) : null;
        final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
        if (merged != null) {
            for (AggregatorFactory aggregator : merged) {
                aggregators.put(aggregator.getName(), aggregator);
            }
        }
    }
    final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
    final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
    final String mergedId;
    if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
        mergedId = arg1.getId();
    } else {
        mergedId = "merged";
    }
    final Boolean rollup;
    if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
        rollup = arg1.isRollup();
    } else {
        rollup = null;
    }
    return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
Also used : AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) AggregatorFactoryNotMergeableException(io.druid.query.aggregation.AggregatorFactoryNotMergeableException) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(io.druid.data.input.impl.TimestampSpec) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) Map(java.util.Map) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 85 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class SegmentMetadataQueryRunnerFactory method createRunner.

@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
    return new QueryRunner<SegmentAnalysis>() {

        @Override
        public Sequence<SegmentAnalysis> run(Query<SegmentAnalysis> inQ, Map<String, Object> responseContext) {
            SegmentMetadataQuery query = (SegmentMetadataQuery) inQ;
            final SegmentAnalyzer analyzer = new SegmentAnalyzer(query.getAnalysisTypes());
            final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
            final long numRows = analyzer.numRows(segment);
            long totalSize = 0;
            if (analyzer.analyzingSize()) {
                // Initialize with the size of the whitespace, 1 byte per
                totalSize = analyzedColumns.size() * numRows;
            }
            Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
            ColumnIncluderator includerator = query.getToInclude();
            for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
                final String columnName = entry.getKey();
                final ColumnAnalysis column = entry.getValue();
                if (!column.isError()) {
                    totalSize += column.getSize();
                }
                if (includerator.include(columnName)) {
                    columns.put(columnName, column);
                }
            }
            List<Interval> retIntervals = query.analyzingInterval() ? Arrays.asList(segment.getDataInterval()) : null;
            final Map<String, AggregatorFactory> aggregators;
            Metadata metadata = null;
            if (query.hasAggregators()) {
                metadata = segment.asStorageAdapter().getMetadata();
                if (metadata != null && metadata.getAggregators() != null) {
                    aggregators = Maps.newHashMap();
                    for (AggregatorFactory aggregator : metadata.getAggregators()) {
                        aggregators.put(aggregator.getName(), aggregator);
                    }
                } else {
                    aggregators = null;
                }
            } else {
                aggregators = null;
            }
            final TimestampSpec timestampSpec;
            if (query.hasTimestampSpec()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
            } else {
                timestampSpec = null;
            }
            final Granularity queryGranularity;
            if (query.hasQueryGranularity()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
            } else {
                queryGranularity = null;
            }
            Boolean rollup = null;
            if (query.hasRollup()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                rollup = metadata != null ? metadata.isRollup() : null;
                if (rollup == null) {
                    // in this case, this segment is built before no-rollup function is coded,
                    // thus it is built with rollup
                    rollup = Boolean.TRUE;
                }
            }
            return Sequences.simple(Arrays.asList(new SegmentAnalysis(segment.getIdentifier(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
        }
    };
}
Also used : BaseQuery(io.druid.query.BaseQuery) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) Query(io.druid.query.Query) Metadata(io.druid.segment.Metadata) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) ColumnIncluderator(io.druid.query.metadata.metadata.ColumnIncluderator) QueryRunner(io.druid.query.QueryRunner) ConcatQueryRunner(io.druid.query.ConcatQueryRunner) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(io.druid.data.input.impl.TimestampSpec) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) Map(java.util.Map) Interval(org.joda.time.Interval)

Aggregations

AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)148 Test (org.junit.Test)86 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)82 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)64 Interval (org.joda.time.Interval)45 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)38 DateTime (org.joda.time.DateTime)37 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)32 Result (io.druid.query.Result)31 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)27 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 Row (io.druid.data.input.Row)24 PostAggregator (io.druid.query.aggregation.PostAggregator)24 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)22 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)19 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)18 LongFirstAggregatorFactory (io.druid.query.aggregation.first.LongFirstAggregatorFactory)18 LongLastAggregatorFactory (io.druid.query.aggregation.last.LongLastAggregatorFactory)18 DimensionSpec (io.druid.query.dimension.DimensionSpec)18 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)17