Search in sources :

Example 31 with PeriodGranularity

use of org.apache.druid.java.util.common.granularity.PeriodGranularity in project druid by druid-io.

the class CompactionTaskTest method testQueryGranularityAndSegmentGranularityNonNull.

@Test
public void testQueryGranularityAndSegmentGranularityNonNull() throws IOException, SegmentLoadingException {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), null, null, null, new ClientCompactionTaskGranularitySpec(new PeriodGranularity(Period.months(3), null, null), new PeriodGranularity(Period.months(3), null, null), null), COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = ImmutableList.of(new DimensionsSpec(getDimensionSchema(new DoubleDimensionSchema("string_to_double"))));
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(1, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), Collections.singletonList(COMPACTION_INTERVAL), new PeriodGranularity(Period.months(3), null, null), new PeriodGranularity(Period.months(3), null, null), IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) Test(org.junit.Test)

Example 32 with PeriodGranularity

use of org.apache.druid.java.util.common.granularity.PeriodGranularity in project druid by druid-io.

the class MovingAverageQueryRunner method run.

@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
    MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
    List<Interval> intervals;
    final Period period;
    // Get the largest bucket from the list of averagers
    Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
    int buckets = opt.orElse(0);
    // Extend the interval beginning by specified bucket - 1
    if (maq.getGranularity() instanceof PeriodGranularity) {
        period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
        int offset = buckets <= 0 ? 0 : (1 - buckets);
        intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
    } else {
        throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
    }
    Sequence<Row> resultsSeq;
    DataSource dataSource = maq.getDataSource();
    if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
        // build groupBy query from movingAverage query
        GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
        GroupByQuery gbq = builder.build();
        ResponseContext gbqResponseContext = ResponseContext.createEmpty();
        gbqResponseContext.merge(responseContext);
        gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
        Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
    } else {
        // no dimensions, so optimize this as a TimeSeries
        TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
        ResponseContext tsqResponseContext = ResponseContext.createEmpty();
        tsqResponseContext.merge(responseContext);
        tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
        Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
    }
    // Process into period buckets
    Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
    // Apply the windows analysis functions
    Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
    // Apply any postAveragers
    Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
    // remove rows outside the reporting window
    List<Interval> reportingIntervals = maq.getIntervals();
    movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
    // Apply any having, sorting, and limits
    movingAvgResults = maq.applyLimit(movingAvgResults);
    return movingAvgResults;
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) MapBasedRow(org.apache.druid.data.input.MapBasedRow) AveragerFactory(org.apache.druid.query.movingaverage.averagers.AveragerFactory) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) Row(org.apache.druid.data.input.Row) QueryStats(org.apache.druid.server.QueryStats) Interval(org.joda.time.Interval) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Period(org.joda.time.Period) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) ResponseContext(org.apache.druid.query.context.ResponseContext) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) Throwables(com.google.common.base.Throwables) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) RequestLogger(org.apache.druid.server.log.RequestLogger) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) Result(org.apache.druid.query.Result) List(java.util.List) UnionDataSource(org.apache.druid.query.UnionDataSource) RequestLogLine(org.apache.druid.server.RequestLogLine) Optional(java.util.Optional) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Result(org.apache.druid.query.Result) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) ISE(org.apache.druid.java.util.common.ISE) ResultRow(org.apache.druid.query.groupby.ResultRow) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Period(org.joda.time.Period) UnionDataSource(org.apache.druid.query.UnionDataSource) DataSource(org.apache.druid.query.DataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryStats(org.apache.druid.server.QueryStats) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) ResultRow(org.apache.druid.query.groupby.ResultRow) Interval(org.joda.time.Interval)

Example 33 with PeriodGranularity

use of org.apache.druid.java.util.common.granularity.PeriodGranularity in project druid by druid-io.

the class ThetaSketchSqlAggregatorTest method testAvgDailyCountDistinctThetaSketch.

@Test
public void testAvgDailyCountDistinctThetaSketch() throws Exception {
    // Can't vectorize due to outer query (it operates on an inlined data source, which cannot be vectorized).
    cannotVectorize();
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 1L });
    testQuery("SELECT\n" + "  AVG(u)\n" + "FROM (SELECT FLOOR(__time TO DAY), APPROX_COUNT_DISTINCT_DS_THETA(cnt) AS u FROM druid.foo GROUP BY 1)", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(new PeriodGranularity(Period.days(1), null, DateTimeZone.UTC)).aggregators(Collections.singletonList(new SketchMergeAggregatorFactory("a0:a", "cnt", null, null, null, null))).postAggregators(ImmutableList.of(new FinalizingFieldAccessPostAggregator("a0", "a0:a"))).context(TIMESERIES_CONTEXT_BY_GRAN).build().withOverriddenContext(BaseCalciteQueryTest.getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")))).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(NullHandling.replaceWithDefault() ? Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count")) : Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("a0", null, null))))).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) SketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory) ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 34 with PeriodGranularity

use of org.apache.druid.java.util.common.granularity.PeriodGranularity in project druid by druid-io.

the class GranularityTest method testIncrementOverSpringForward.

// Regression test for https://github.com/apache/druid/issues/5200.
@Test
public void testIncrementOverSpringForward() {
    // Sao Paulo daylight savings time in 2017 starts at midnight. When we spring forward, 00:00:00 doesn't exist.
    // (The clock goes straight from 23:59:59 to 01:00:00.) This test verifies we handle the case correctly while
    // iterating through Paulistano days.
    final DateTimeZone saoPaulo = DateTimes.inferTzFromString("America/Sao_Paulo");
    final PeriodGranularity granSaoPauloDay = new PeriodGranularity(Period.days(1), null, saoPaulo);
    final Iterable<Interval> intervals = granSaoPauloDay.getIterable(new Interval(new DateTime("2017-10-14", saoPaulo), new DateTime("2017-10-17", saoPaulo)));
    // Similar to what query engines do: call granularity.bucketStart on the datetimes returned by their cursors.
    // (And the cursors, in turn, use getIterable like above.)
    Assert.assertEquals(ImmutableList.of(new DateTime("2017-10-14", saoPaulo), new DateTime("2017-10-15T01", saoPaulo), new DateTime("2017-10-16", saoPaulo)), StreamSupport.stream(intervals.spliterator(), false).map(interval -> granSaoPauloDay.bucketStart(interval.getStart())).collect(Collectors.toList()));
}
Also used : PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 35 with PeriodGranularity

use of org.apache.druid.java.util.common.granularity.PeriodGranularity in project druid by druid-io.

the class GranularityTest method testCustomPeriodToDate.

@Test
public void testCustomPeriodToDate() {
    PathDate[] customChecks = { new PathDate(new DateTime(2011, 3, 15, 20, 50, 42, 0, ISOChronology.getInstanceUTC()), null, "y=2011/m=03/d=15/H=20/M=50/S=43/Test0"), new PathDate(new DateTime(2011, 3, 15, 20, 50, 42, 0, ISOChronology.getInstanceUTC()), null, "/y=2011/m=03/d=15/H=20/M=50/S=43/Test0"), new PathDate(new DateTime(2011, 3, 15, 20, 50, 42, 0, ISOChronology.getInstanceUTC()), null, "valid/y=2011/m=03/d=15/H=20/M=50/S=43/Test1") };
    checkToDate(new PeriodGranularity(new Period("PT2S"), null, DateTimeZone.UTC), Granularity.Formatter.DEFAULT, customChecks);
}
Also used : PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Period(org.joda.time.Period) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Aggregations

PeriodGranularity (org.apache.druid.java.util.common.granularity.PeriodGranularity)65 Test (org.junit.Test)57 Period (org.joda.time.Period)52 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)27 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)24 DateTime (org.joda.time.DateTime)13 QueryRunner (org.apache.druid.query.QueryRunner)11 Result (org.apache.druid.query.Result)11 ArrayList (java.util.ArrayList)10 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)10 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)10 ResponseContext (org.apache.druid.query.context.ResponseContext)9 DateTimeZone (org.joda.time.DateTimeZone)9 ChainedExecutionQueryRunner (org.apache.druid.query.ChainedExecutionQueryRunner)8 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)8 Sequence (org.apache.druid.java.util.common.guava.Sequence)7 QueryPlus (org.apache.druid.query.QueryPlus)7 Interval (org.joda.time.Interval)7 QueryDataSource (org.apache.druid.query.QueryDataSource)6