Search in sources :

Example 16 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class GroupByQueryRunnerTest method testSubqueryWithHyperUniques.

@Test
public void testSubqueryWithHyperUniques() {
    GroupByQuery subquery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), new HyperUniquesAggregatorFactory("quality_uniques", "quality_uniques"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
    GroupByQuery query = GroupByQuery.builder().setDataSource(subquery).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("alias", "alias"))).setAggregatorSpecs(Arrays.asList(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"), new HyperUniquesAggregatorFactory("uniq", "quality_uniques"))).setGranularity(QueryRunnerTestHelper.allGran).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 2L, "idx", 282L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 230L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 2L, "idx", 324L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 2L, "idx", 233L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 5317L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 2L, "idx", 235L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 5405L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 2L, "idx", 175L, "uniq", 1.0002442201269182), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 2L, "idx", 245L, "uniq", 1.0002442201269182));
    // Subqueries are handled by the ToolChest
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Also used : DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) RegexFilteredDimensionSpec(io.druid.query.dimension.RegexFilteredDimensionSpec) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) ListFilteredDimensionSpec(io.druid.query.dimension.ListFilteredDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) Row(io.druid.data.input.Row) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 17 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class HadoopConverterJobTest method setUp.

@Before
public void setUp() throws Exception {
    final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpec = new MetadataStorageUpdaterJobSpec() {

        @Override
        public String getSegmentTable() {
            return derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable();
        }

        @Override
        public MetadataStorageConnectorConfig get() {
            return derbyConnectorRule.getMetadataConnectorConfig();
        }
    };
    final File scratchFileDir = temporaryFolder.newFolder();
    storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY);
    tmpSegmentDir = temporaryFolder.newFolder();
    System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath());
    final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv"));
    final File tmpInputFile = temporaryFolder.newFile();
    FileUtils.retryCopy(new ByteSource() {

        @Override
        public InputStream openStream() throws IOException {
            return url.openStream();
        }
    }, tmpInputFile, FileUtils.IS_EXCEPTION, 3);
    final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(DATASOURCE, HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(TestIndex.DIMENSIONS)), null, null), "\t", "", Arrays.asList(TestIndex.COLUMNS)), null), Map.class), new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }, new UniformGranularitySpec(Granularities.MONTH, Granularities.DAY, ImmutableList.<Interval>of(interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("type", "static", "paths", tmpInputFile.getAbsolutePath()), metadataStorageUpdaterJobSpec, tmpSegmentDir.getAbsolutePath()), new HadoopTuningConfig(scratchFileDir.getAbsolutePath(), null, null, null, null, null, false, false, false, false, null, false, false, null, null, null, false, false)));
    metadataStorageTablesConfigSupplier = derbyConnectorRule.metadataTablesConfigSupplier();
    connector = derbyConnectorRule.getConnector();
    try {
        connector.getDBI().withHandle(new HandleCallback<Void>() {

            @Override
            public Void withHandle(Handle handle) throws Exception {
                handle.execute("DROP TABLE druid_segments");
                return null;
            }
        });
    } catch (CallbackFailedException e) {
    // Who cares
    }
    List<Jobby> jobs = ImmutableList.of(new Jobby() {

        @Override
        public boolean run() {
            connector.createSegmentTable(metadataStorageUpdaterJobSpec.getSegmentTable());
            return true;
        }
    }, new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), new HadoopDruidIndexerJob(hadoopDruidIndexerConfig, new SQLMetadataStorageUpdaterJobHandler(connector)));
    JobHelper.runJobs(jobs, hadoopDruidIndexerConfig);
}
Also used : HadoopIngestionSpec(io.druid.indexer.HadoopIngestionSpec) HadoopTuningConfig(io.druid.indexer.HadoopTuningConfig) URL(java.net.URL) HadoopIOConfig(io.druid.indexer.HadoopIOConfig) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) SQLMetadataStorageUpdaterJobHandler(io.druid.indexer.SQLMetadataStorageUpdaterJobHandler) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) InputStream(java.io.InputStream) DelimitedParseSpec(io.druid.data.input.impl.DelimitedParseSpec) IOException(java.io.IOException) HadoopDruidIndexerConfig(io.druid.indexer.HadoopDruidIndexerConfig) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) Handle(org.skife.jdbi.v2.Handle) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) DataSchema(io.druid.segment.indexing.DataSchema) Jobby(io.druid.indexer.Jobby) HadoopDruidIndexerJob(io.druid.indexer.HadoopDruidIndexerJob) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ByteSource(com.google.common.io.ByteSource) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HadoopDruidDetermineConfigurationJob(io.druid.indexer.HadoopDruidDetermineConfigurationJob) Interval(org.joda.time.Interval) Before(org.junit.Before)

Example 18 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class InputRowSerdeTest method testSerde.

@Test
public void testSerde() {
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3") };
    // Ignore Unparseable aggregator
    byte[] data = InputRowSerde.toBytes(in, aggregatorFactories, false);
    InputRow out = InputRowSerde.fromBytes(data, aggregatorFactories);
    Assert.assertEquals(timestamp, out.getTimestampFromEpoch());
    Assert.assertEquals(dims, out.getDimensions());
    Assert.assertEquals(Collections.EMPTY_LIST, out.getDimension("dim_non_existing"));
    Assert.assertEquals(ImmutableList.of("d1v"), out.getDimension("d1"));
    Assert.assertEquals(ImmutableList.of("d2v1", "d2v2"), out.getDimension("d2"));
    Assert.assertEquals(0.0f, out.getFloatMetric("agg_non_existing"), 0.00001);
    Assert.assertEquals(5.0f, out.getFloatMetric("m1out"), 0.00001);
    Assert.assertEquals(100L, out.getLongMetric("m2out"));
    Assert.assertEquals(1, ((HyperLogLogCollector) out.getRaw("m3out")).estimateCardinality(), 0.001);
    Assert.assertEquals(0L, out.getLongMetric("unparseable"));
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Test(org.junit.Test)

Example 19 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class DruidSchemaTest method setUp.

@Before
public void setUp() throws Exception {
    Calcites.setSystemProperties();
    final File tmpDir = temporaryFolder.newFolder();
    final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1") }).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
    final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory("m1", "m1") }).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
    walker = new SpecificSegmentsQuerySegmentWalker(CalciteTests.queryRunnerFactoryConglomerate()).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2000/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index1).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2001/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index2).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index2);
    schema = new DruidSchema(walker, new TestServerInventoryView(walker.getSegments()), PLANNER_CONFIG_DEFAULT);
    schema.start();
    schema.awaitInitialization();
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) TestServerInventoryView(io.druid.sql.calcite.util.TestServerInventoryView) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) IndexBuilder(io.druid.segment.IndexBuilder) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SpecificSegmentsQuerySegmentWalker(io.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(io.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) File(java.io.File) Interval(org.joda.time.Interval) Before(org.junit.Before)

Aggregations

HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)19 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)16 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)11 Test (org.junit.Test)9 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)4 TimestampSpec (io.druid.data.input.impl.TimestampSpec)4 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)4 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)4 ArrayList (java.util.ArrayList)4 Interval (org.joda.time.Interval)4 BenchmarkSchemaInfo (io.druid.benchmark.datagen.BenchmarkSchemaInfo)3 InputRow (io.druid.data.input.InputRow)3 Row (io.druid.data.input.Row)3 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)3 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)3 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)3 HyperUniqueFinalizingPostAggregator (io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)3