Search in sources :

Example 76 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class KafkaIndexTaskTest method testKafkaInputFormat.

@Test(timeout = 60_000L)
public void testKafkaInputFormat() throws Exception {
    // Insert data
    insertData(Iterables.limit(records, 3));
    final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.testheader.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, KAFKA_INPUT_FORMAT));
    Assert.assertTrue(task.supportsQueries());
    final ListenableFuture<TaskStatus> future = runTask(task);
    while (countEvents(task) != 3) {
        Thread.sleep(25);
    }
    Assert.assertEquals(Status.READING, task.getRunner().getStatus());
    final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
    List<ScanResultValue> scanResultValues = scanData(task, interval);
    // verify that there are no records indexed in the rollbacked time period
    Assert.assertEquals(3, Iterables.size(scanResultValues));
    int i = 0;
    for (ScanResultValue result : scanResultValues) {
        final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
        Assert.assertEquals("application/json", event.get("kafka.testheader.encoding"));
        Assert.assertEquals("y", event.get("dim2"));
    }
    // insert remaining data
    insertData(Iterables.skip(records, 3));
    // Wait for task to exit
    Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
    // Check metrics
    Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest)

Example 77 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class QuantileSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String histogramName = StringUtils.format("%s:agg", name);
    final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
    if (!probabilityArg.isA(SqlKind.LITERAL)) {
        // Probability must be a literal in order to plan.
        return null;
    }
    final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
    final int resolution;
    if (aggregateCall.getArgList().size() >= 3) {
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
        if (!resolutionArg.isA(SqlKind.LITERAL)) {
            // Resolution must be a literal in order to plan.
            return null;
        }
        resolution = ((Number) RexLiteral.value(resolutionArg)).intValue();
    } else {
        resolution = ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE;
    }
    final int numBuckets = ApproximateHistogram.DEFAULT_BUCKET_SIZE;
    final float lowerLimit = Float.NEGATIVE_INFINITY;
    final float upperLimit = Float.POSITIVE_INFINITY;
    // Look for existing matching aggregatorFactory.
    for (final Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            if (factory instanceof ApproximateHistogramAggregatorFactory) {
                final ApproximateHistogramAggregatorFactory theFactory = (ApproximateHistogramAggregatorFactory) factory;
                // Check input for equivalence.
                final boolean inputMatches;
                final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
                if (virtualInput == null) {
                    inputMatches = input.isDirectColumnAccess() && input.getDirectColumn().equals(theFactory.getFieldName());
                } else {
                    inputMatches = virtualInput.equals(input);
                }
                final boolean matches = inputMatches && theFactory.getResolution() == resolution && theFactory.getNumBuckets() == numBuckets && theFactory.getLowerLimit() == lowerLimit && theFactory.getUpperLimit() == upperLimit;
                if (matches) {
                    // Found existing one. Use this.
                    return Aggregation.create(ImmutableList.of(), new QuantilePostAggregator(name, factory.getName(), probability));
                }
            }
        }
    }
    // No existing match found. Create a new one.
    if (input.isDirectColumnAccess()) {
        if (rowSignature.getColumnType(input.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
            aggregatorFactory = new ApproximateHistogramFoldingAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
        } else {
            aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, input.getDirectColumn(), resolution, numBuckets, lowerLimit, upperLimit, false);
        }
    } else {
        final String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, ColumnType.FLOAT);
        aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, virtualColumnName, resolution, numBuckets, lowerLimit, upperLimit, false);
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), new QuantilePostAggregator(name, histogramName, probability));
}
Also used : Project(org.apache.calcite.rel.core.Project) SqlAggregator(org.apache.druid.sql.calcite.aggregation.SqlAggregator) ReturnTypes(org.apache.calcite.sql.type.ReturnTypes) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ApproximateHistogram(org.apache.druid.query.aggregation.histogram.ApproximateHistogram) ImmutableList(com.google.common.collect.ImmutableList) RexNode(org.apache.calcite.rex.RexNode) VirtualColumnRegistry(org.apache.druid.sql.calcite.rel.VirtualColumnRegistry) PlannerContext(org.apache.druid.sql.calcite.planner.PlannerContext) Nullable(javax.annotation.Nullable) SqlKind(org.apache.calcite.sql.SqlKind) SqlTypeFamily(org.apache.calcite.sql.type.SqlTypeFamily) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) RexBuilder(org.apache.calcite.rex.RexBuilder) RexLiteral(org.apache.calcite.rex.RexLiteral) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SqlFunctionCategory(org.apache.calcite.sql.SqlFunctionCategory) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) ValueType(org.apache.druid.segment.column.ValueType) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) List(java.util.List) Aggregations(org.apache.druid.sql.calcite.aggregation.Aggregations) RowSignature(org.apache.druid.segment.column.RowSignature) OperandTypes(org.apache.calcite.sql.type.OperandTypes) ColumnType(org.apache.druid.segment.column.ColumnType) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) Expressions(org.apache.druid.sql.calcite.expression.Expressions) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) QuantilePostAggregator(org.apache.druid.query.aggregation.histogram.QuantilePostAggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ApproximateHistogramFoldingAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ApproximateHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 78 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
    IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    });
    Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
    List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) DruidTable(org.apache.calcite.adapter.druid.DruidTable) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) Constants(org.apache.hadoop.hive.conf.Constants) DruidStorageHandler(org.apache.hadoop.hive.druid.DruidStorageHandler) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) Ignore(org.junit.Ignore) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IndexSpec(org.apache.druid.segment.IndexSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(org.apache.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 79 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class IndexedTableJoinCursorBenchmark method makeQueryableIndexSegment.

public static QueryableIndexSegment makeQueryableIndexSegment(Closer closer, String dataSource, int rowsPerSegment) {
    final List<GeneratorColumnSchema> schemaColumnsInfo = ImmutableList.of(GeneratorColumnSchema.makeSequential("stringKey", ValueType.STRING, false, 1, null, 0, rowsPerSegment), GeneratorColumnSchema.makeSequential("longKey", ValueType.LONG, false, 1, null, 0, rowsPerSegment), GeneratorColumnSchema.makeLazyZipf("string1", ValueType.STRING, false, 1, 0.1, 0, rowsPerSegment, 2.0), GeneratorColumnSchema.makeLazyZipf("string2", ValueType.STRING, false, 1, 0.3, 0, 1000000, 1.5), GeneratorColumnSchema.makeLazyZipf("string3", ValueType.STRING, false, 1, 0.12, 0, 1000, 1.25), GeneratorColumnSchema.makeLazyZipf("string4", ValueType.STRING, false, 1, 0.22, 0, 12000, 3.0), GeneratorColumnSchema.makeLazyZipf("string5", ValueType.STRING, false, 1, 0.05, 0, 33333, 1.8), GeneratorColumnSchema.makeLazyZipf("long1", ValueType.LONG, false, 1, 0.1, 0, 1001, 2.0), GeneratorColumnSchema.makeLazyZipf("long2", ValueType.LONG, false, 1, 0.01, 0, 666666, 2.2), GeneratorColumnSchema.makeLazyZipf("long3", ValueType.LONG, false, 1, 0.12, 0, 1000000, 2.5), GeneratorColumnSchema.makeLazyZipf("long4", ValueType.LONG, false, 1, 0.4, 0, 23, 1.2), GeneratorColumnSchema.makeLazyZipf("long5", ValueType.LONG, false, 1, 0.33, 0, 9999, 1.5), GeneratorColumnSchema.makeLazyZipf("double1", ValueType.DOUBLE, false, 1, 0.1, 0, 333, 2.2), GeneratorColumnSchema.makeLazyZipf("double2", ValueType.DOUBLE, false, 1, 0.01, 0, 4021, 2.5), GeneratorColumnSchema.makeLazyZipf("double3", ValueType.DOUBLE, false, 1, 0.41, 0, 90210, 4.0), GeneratorColumnSchema.makeLazyZipf("double4", ValueType.DOUBLE, false, 1, 0.5, 0, 5555555, 1.2), GeneratorColumnSchema.makeLazyZipf("double5", ValueType.DOUBLE, false, 1, 0.23, 0, 80, 1.8), GeneratorColumnSchema.makeLazyZipf("float1", ValueType.FLOAT, false, 1, 0.11, 0, 1000000, 1.7), GeneratorColumnSchema.makeLazyZipf("float2", ValueType.FLOAT, false, 1, 0.4, 0, 10, 1.5), GeneratorColumnSchema.makeLazyZipf("float3", ValueType.FLOAT, false, 1, 0.8, 0, 5000, 2.3), GeneratorColumnSchema.makeLazyZipf("float4", ValueType.FLOAT, false, 1, 0.999, 0, 14440, 2.0), GeneratorColumnSchema.makeLazyZipf("float5", ValueType.FLOAT, false, 1, 0.001, 0, 1029, 1.5));
    final List<AggregatorFactory> aggs = new ArrayList<>();
    aggs.add(new CountAggregatorFactory("rows"));
    final Interval interval = Intervals.of("2000-01-01/P1D");
    final GeneratorSchemaInfo schema = new GeneratorSchemaInfo(schemaColumnsInfo, aggs, interval, false);
    final DataSegment dataSegment = DataSegment.builder().dataSource(dataSource).interval(schema.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build();
    final QueryableIndex index = closer.register(new SegmentGenerator()).generate(dataSegment, schema, Granularities.NONE, rowsPerSegment);
    return closer.register(new QueryableIndexSegment(index, SegmentId.dummy(dataSource)));
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) ArrayList(java.util.ArrayList) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) Interval(org.joda.time.Interval)

Example 80 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class FilteredAggregatorBenchmark method setup.

/**
 * Setup everything common for benchmarking both the incremental-index and the queriable-index.
 */
@Setup
public void setup() {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schema);
    generator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
    filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
    filteredMetric = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
    factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    List<AggregatorFactory> queryAggs = Collections.singletonList(filteredMetric);
    query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataGenerator(org.apache.druid.segment.generator.DataGenerator) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) Setup(org.openjdk.jmh.annotations.Setup)

Aggregations

AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)163 Test (org.junit.Test)85 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)56 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)48 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)42 ArrayList (java.util.ArrayList)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)28 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 Nullable (javax.annotation.Nullable)22 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)22 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)22 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)22 HashMap (java.util.HashMap)20 List (java.util.List)20 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)18 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)18 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)18 Map (java.util.Map)17 FloatFirstAggregatorFactory (org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory)15