Search in sources :

Example 51 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    final boolean combine = subquery == null;
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(timeStart);
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    final AppendableIndexBuilder indexBuilder;
    if (query.getContextValue("useOffheap", false)) {
        throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
    } else {
        indexBuilder = new OnheapIncrementalIndex.Builder();
    }
    index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            final MapBasedRow mapBasedRow;
            if (in instanceof MapBasedRow) {
                mapBasedRow = (MapBasedRow) in;
            } else if (in instanceof ResultRow) {
                final ResultRow row = (ResultRow) in;
                mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            try {
                accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
            } catch (IndexSizeExceededException e) {
                throw new ResourceLimitExceededException(e.getMessage());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) AppendableIndexBuilder(org.apache.druid.segment.incremental.AppendableIndexBuilder) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Granularity(org.apache.druid.java.util.common.granularity.Granularity) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(org.apache.druid.java.util.common.ISE) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) Pair(org.apache.druid.java.util.common.Pair) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 52 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class ApproximateHistogramAggregationTest method testIngestWithNullsIgnoredAndQuery.

@Test
public void testIngestWithNullsIgnoredAndQuery() throws Exception {
    MapBasedRow row = ingestAndQuery(true);
    Assert.assertEquals(92.782760, row.getMetric("index_min").floatValue(), 0.0001);
    Assert.assertEquals(135.109191, row.getMetric("index_max").floatValue(), 0.0001);
    Assert.assertEquals(133.69340, row.getMetric("index_quantile").floatValue(), 0.0001);
    Assert.assertEquals(new Quantiles(new float[] { 0.2f, 0.7f }, new float[] { 92.78276f, 103.195305f }, 92.78276f, 135.109191f), row.getRaw("index_quantiles"));
    Assert.assertEquals("Histogram{breaks=[92.0, 94.0, 96.0, 98.0, 100.0, 106.0, 108.0, 134.0, 136.0], counts=[1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0]}", row.getRaw("index_buckets").toString());
    Assert.assertEquals("Histogram{breaks=[50.0, 100.0], counts=[3.0]}", row.getRaw("index_custom").toString());
    Assert.assertEquals("Histogram{breaks=[71.61954498291016, 92.78276062011719, 113.94597625732422, 135.10919189453125], counts=[1.0, 3.0, 1.0]}", row.getRaw("index_equal").toString());
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 53 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class ApproximateHistogramAggregationTest method testIngestWithNullsToZeroAndQuery.

@Test
public void testIngestWithNullsToZeroAndQuery() throws Exception {
    // This is already tested in testIngestWithNullsIgnoredAndQuery()
    if (NullHandling.replaceWithDefault()) {
        MapBasedRow row = ingestAndQuery(false);
        Assert.assertEquals(0.0F, row.getMetric("index_min"));
        Assert.assertEquals(135.109191, row.getMetric("index_max").floatValue(), 0.0001);
        Assert.assertEquals(131.428176, row.getMetric("index_quantile").floatValue(), 0.0001);
        Assert.assertEquals(new Quantiles(new float[] { 0.2f, 0.7f }, new float[] { 0.0f, 92.95146f }, 0.0f, 135.109191f), row.getRaw("index_quantiles"));
        Assert.assertEquals("Histogram{breaks=[-2.0, 92.0, 94.0, 96.0, 98.0, 100.0, 106.0, 108.0, 134.0, 136.0], counts=[8.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0]}", row.getRaw("index_buckets").toString());
        Assert.assertEquals("Histogram{breaks=[50.0, 100.0], counts=[3.0]}", row.getRaw("index_custom").toString());
        Assert.assertEquals("Histogram{breaks=[-67.55459594726562, 0.0, 67.55459594726562, 135.10919189453125], counts=[8.0, 0.0, 5.0]}", row.getRaw("index_equal").toString());
    }
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest)

Example 54 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class TimewarpOperator method postProcess.

public QueryRunner<T> postProcess(final QueryRunner<T> baseRunner, final long now) {
    return new QueryRunner<T>() {

        @Override
        public Sequence<T> run(final QueryPlus<T> queryPlus, final ResponseContext responseContext) {
            final DateTimeZone tz = queryPlus.getQuery().getTimezone();
            final long offset = computeOffset(now, tz);
            final Interval interval = queryPlus.getQuery().getIntervals().get(0);
            final Interval modifiedInterval = new Interval(Math.min(interval.getStartMillis() + offset, now + offset), Math.min(interval.getEndMillis() + offset, now + offset), interval.getChronology());
            return Sequences.map(baseRunner.run(queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(modifiedInterval)))), responseContext), new Function<T, T>() {

                @Override
                public T apply(T input) {
                    if (input instanceof Result) {
                        Result res = (Result) input;
                        Object value = res.getValue();
                        if (value instanceof TimeBoundaryResultValue) {
                            TimeBoundaryResultValue boundary = (TimeBoundaryResultValue) value;
                            DateTime minTime;
                            try {
                                minTime = boundary.getMinTime();
                            } catch (IllegalArgumentException e) {
                                minTime = null;
                            }
                            final DateTime maxTime = boundary.getMaxTime();
                            return (T) ((TimeBoundaryQuery) queryPlus.getQuery()).buildResult(DateTimes.utc(Math.min(res.getTimestamp().getMillis() - offset, now)), minTime != null ? minTime.minus(offset) : null, maxTime != null ? DateTimes.utc(Math.min(maxTime.getMillis() - offset, now)) : null).iterator().next();
                        }
                        return (T) new Result(res.getTimestamp().minus(offset), value);
                    } else if (input instanceof MapBasedRow) {
                        MapBasedRow row = (MapBasedRow) input;
                        return (T) new MapBasedRow(row.getTimestamp().minus(offset), row.getEvent());
                    }
                    // default to noop for unknown result types
                    return input;
                }
            });
        }
    };
}
Also used : MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) TimeBoundaryQuery(org.apache.druid.query.timeboundary.TimeBoundaryQuery) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime) MapBasedRow(org.apache.druid.data.input.MapBasedRow) ResponseContext(org.apache.druid.query.context.ResponseContext) TimeBoundaryResultValue(org.apache.druid.query.timeboundary.TimeBoundaryResultValue) Interval(org.joda.time.Interval)

Example 55 with MapBasedRow

use of org.apache.druid.data.input.MapBasedRow in project druid by druid-io.

the class OnheapIncrementalIndex method iterableWithPostAggregations.

@Override
public Iterable<Row> iterableWithPostAggregations(@Nullable final List<PostAggregator> postAggs, final boolean descending) {
    final AggregatorFactory[] metrics = getMetricAggs();
    {
        return () -> {
            final List<DimensionDesc> dimensions = getDimensions();
            return Iterators.transform(getFacts().iterator(descending), incrementalIndexRow -> {
                final int rowOffset = incrementalIndexRow.getRowIndex();
                Object[] theDims = incrementalIndexRow.getDims();
                Map<String, Object> theVals = Maps.newLinkedHashMap();
                for (int i = 0; i < theDims.length; ++i) {
                    Object dim = theDims[i];
                    DimensionDesc dimensionDesc = dimensions.get(i);
                    if (dimensionDesc == null) {
                        continue;
                    }
                    String dimensionName = dimensionDesc.getName();
                    DimensionHandler handler = dimensionDesc.getHandler();
                    if (dim == null || handler.getLengthOfEncodedKeyComponent(dim) == 0) {
                        theVals.put(dimensionName, null);
                        continue;
                    }
                    final DimensionIndexer indexer = dimensionDesc.getIndexer();
                    Object rowVals = indexer.convertUnsortedEncodedKeyComponentToActualList(dim);
                    theVals.put(dimensionName, rowVals);
                }
                Aggregator[] aggs = getAggsForRow(rowOffset);
                for (int i = 0; i < aggs.length; ++i) {
                    theVals.put(metrics[i].getName(), aggs[i].get());
                }
                if (postAggs != null) {
                    for (PostAggregator postAgg : postAggs) {
                        theVals.put(postAgg.getName(), postAgg.compute(theVals));
                    }
                }
                return new MapBasedRow(incrementalIndexRow.getTimestamp(), theVals);
            });
        };
    }
}
Also used : DimensionHandler(org.apache.druid.segment.DimensionHandler) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) MapBasedRow(org.apache.druid.data.input.MapBasedRow) DimensionIndexer(org.apache.druid.segment.DimensionIndexer)

Aggregations

MapBasedRow (org.apache.druid.data.input.MapBasedRow)65 Test (org.junit.Test)50 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)36 ArrayList (java.util.ArrayList)21 Row (org.apache.druid.data.input.Row)16 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)16 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)16 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)15 HashMap (java.util.HashMap)13 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)12 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)10 List (java.util.List)9 ResultRow (org.apache.druid.query.groupby.ResultRow)9 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 File (java.io.File)7 ByteBuffer (java.nio.ByteBuffer)6 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)6 TimeseriesResultValue (org.apache.druid.query.timeseries.TimeseriesResultValue)6 IOException (java.io.IOException)5