Search in sources :

Example 6 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class BaseTopNAlgorithm method makeAggregators.

public static Aggregator[] makeAggregators(Cursor cursor, List<AggregatorFactory> aggregatorSpecs) {
    Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()];
    int aggregatorIndex = 0;
    for (AggregatorFactory spec : aggregatorSpecs) {
        aggregators[aggregatorIndex] = spec.factorize(cursor.getColumnSelectorFactory());
        ++aggregatorIndex;
    }
    return aggregators;
}
Also used : Aggregator(org.apache.druid.query.aggregation.Aggregator) BufferAggregator(org.apache.druid.query.aggregation.BufferAggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 7 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TimeseriesBinaryFn method apply.

@Override
public Result<TimeseriesResultValue> apply(Result<TimeseriesResultValue> arg1, Result<TimeseriesResultValue> arg2) {
    if (arg1 == null) {
        return arg2;
    }
    if (arg2 == null) {
        return arg1;
    }
    TimeseriesResultValue arg1Val = arg1.getValue();
    TimeseriesResultValue arg2Val = arg2.getValue();
    Map<String, Object> retVal = new LinkedHashMap<String, Object>();
    for (AggregatorFactory factory : aggregations) {
        final String metricName = factory.getName();
        retVal.put(metricName, factory.combine(arg1Val.getMetric(metricName), arg2Val.getMetric(metricName)));
    }
    return (gran instanceof AllGranularity) ? new Result<TimeseriesResultValue>(arg1.getTimestamp(), new TimeseriesResultValue(retVal)) : new Result<TimeseriesResultValue>(gran.bucketStart(arg1.getTimestamp()), new TimeseriesResultValue(retVal));
}
Also used : AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) LinkedHashMap(java.util.LinkedHashMap)

Example 8 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class OnheapIncrementalIndex method factorizeAggs.

/**
 * Creates aggregators for the given aggregator factories.
 *
 * @return Total initial size in bytes required by all the aggregators.
 * This value is non-zero only when {@link #useMaxMemoryEstimates} is false.
 */
private long factorizeAggs(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row) {
    long totalInitialSizeBytes = 0L;
    rowContainer.set(row);
    final long aggReferenceSize = Long.BYTES;
    for (int i = 0; i < metrics.length; i++) {
        final AggregatorFactory agg = metrics[i];
        if (useMaxMemoryEstimates) {
            aggs[i] = agg.factorize(selectors.get(agg.getName()));
        } else {
            AggregatorAndSize aggregatorAndSize = agg.factorizeWithSize(selectors.get(agg.getName()));
            aggs[i] = aggregatorAndSize.getAggregator();
            totalInitialSizeBytes += aggregatorAndSize.getInitialSizeBytes();
            totalInitialSizeBytes += aggReferenceSize;
        }
    }
    rowContainer.set(null);
    return totalInitialSizeBytes;
}
Also used : AggregatorAndSize(org.apache.druid.query.aggregation.AggregatorAndSize) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 9 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class OnheapIncrementalIndex method getMaxBytesPerRowForAggregators.

/**
 * Old method of memory estimation. Used only when {@link #useMaxMemoryEstimates} is true.
 *
 * Gives estimated max size per aggregator. It is assumed that every aggregator will have enough overhead for its own
 * object header and for a pointer to a selector. We are adding a overhead-factor for each object as additional 16
 * bytes.
 * These 16 bytes or 128 bits is the object metadata for 64-bit JVM process and consists of:
 * <ul>
 * <li>Class pointer which describes the object type: 64 bits
 * <li>Flags which describe state of the object including hashcode: 64 bits
 * <ul/>
 * total size estimation consists of:
 * <ul>
 * <li> metrics length : Integer.BYTES * len
 * <li> maxAggregatorIntermediateSize : getMaxIntermediateSize per aggregator + overhead-factor(16 bytes)
 * </ul>
 *
 * @param incrementalIndexSchema
 *
 * @return long max aggregator size in bytes
 */
private static long getMaxBytesPerRowForAggregators(IncrementalIndexSchema incrementalIndexSchema) {
    final long rowsPerAggregator = incrementalIndexSchema.isRollup() ? ROLLUP_RATIO_FOR_AGGREGATOR_FOOTPRINT_ESTIMATION : 1;
    long maxAggregatorIntermediateSize = ((long) Integer.BYTES) * incrementalIndexSchema.getMetrics().length;
    for (final AggregatorFactory aggregator : incrementalIndexSchema.getMetrics()) {
        maxAggregatorIntermediateSize += (long) aggregator.guessAggregatorHeapFootprint(rowsPerAggregator) + 2L * Long.BYTES;
    }
    return maxAggregatorIntermediateSize;
}
Also used : AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 10 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class SegmentAnalyzerTest method testAnalyzingSegmentWithNonExistentAggregator.

/**
 * This test verifies that if a segment was created using an unknown/invalid aggregator
 * (which can happen if an aggregator was removed for a later version), then,
 * analyzing the segment doesn't fail and the result of analysis of the complex column
 * is reported as an error.
 * @throws IOException
 */
@Test
public void testAnalyzingSegmentWithNonExistentAggregator() throws IOException {
    final URL resource = SegmentAnalyzerTest.class.getClassLoader().getResource("druid.sample.numeric.tsv");
    CharSource source = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8);
    String invalid_aggregator = "invalid_aggregator";
    AggregatorFactory[] metrics = new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.DOUBLE_METRICS[0], "index"), new HyperUniquesAggregatorFactory("quality_uniques", "quality"), new InvalidAggregatorFactory(invalid_aggregator, "quality") };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withDimensionsSpec(TestIndex.DIMENSIONS_SPEC).withMetrics(metrics).withRollup(true).build();
    final IncrementalIndex retVal = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build();
    IncrementalIndex incrementalIndex = TestIndex.loadIncrementalIndex(retVal, source);
    // Analyze the in-memory segment.
    {
        SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        IncrementalIndexSegment segment = new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("ds"));
        Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
        ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator);
        Assert.assertFalse(columnAnalysis.isError());
        Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType());
        Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature());
    }
    // Persist the index.
    final File segmentFile = TestIndex.INDEX_MERGER.persist(incrementalIndex, temporaryFolder.newFolder(), TestIndex.INDEX_SPEC, null);
    // Unload the complex serde, then analyze the persisted segment.
    ComplexMetrics.unregisterSerde(InvalidAggregatorFactory.TYPE);
    {
        SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        QueryableIndexSegment segment = new QueryableIndexSegment(TestIndex.INDEX_IO.loadIndex(segmentFile), SegmentId.dummy("ds"));
        Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
        ColumnAnalysis invalidColumnAnalysis = analyses.get(invalid_aggregator);
        Assert.assertTrue(invalidColumnAnalysis.isError());
        Assert.assertEquals("error:unknown_complex_invalid_complex_column_type", invalidColumnAnalysis.getErrorMessage());
        // Run a segment metadata query also to verify it doesn't break
        final List<SegmentAnalysis> results = getSegmentAnalysises(segment, EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        for (SegmentAnalysis result : results) {
            Assert.assertTrue(result.getColumns().get(invalid_aggregator).isError());
        }
    }
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) CharSource(com.google.common.io.CharSource) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) URL(java.net.URL) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) List(java.util.List) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) Map(java.util.Map) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)163 Test (org.junit.Test)85 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)56 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)48 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)42 ArrayList (java.util.ArrayList)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)28 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 Nullable (javax.annotation.Nullable)22 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)22 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)22 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)22 HashMap (java.util.HashMap)20 List (java.util.List)20 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)18 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)18 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)18 Map (java.util.Map)17 FloatFirstAggregatorFactory (org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory)15