Search in sources :

Example 6 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    if (query.getContextValue("useOffheap", false)) {
        index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
    } else {
        index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
    }
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            if (in instanceof MapBasedRow) {
                try {
                    MapBasedRow row = (MapBasedRow) in;
                    accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
                } catch (IndexSizeExceededException e) {
                    throw new ResourceLimitExceededException(e.getMessage());
                }
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) DimensionSpec(io.druid.query.dimension.DimensionSpec) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) Granularity(io.druid.java.util.common.granularity.Granularity) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(io.druid.java.util.common.ISE) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) Pair(io.druid.java.util.common.Pair) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(io.druid.query.ResourceLimitExceededException) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)

Example 7 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class GroupByQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Row, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) {
    return new CacheStrategy<Row, Object, GroupByQuery>() {

        private static final byte CACHE_STRATEGY_VERSION = 0x1;

        private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();

        private final List<DimensionSpec> dims = query.getDimensions();

        @Override
        public boolean isCacheable(GroupByQuery query, boolean willMergeRunners) {
            return strategySelector.strategize(query).isCacheable(willMergeRunners);
        }

        @Override
        public byte[] computeCacheKey(GroupByQuery query) {
            return new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheablesIgnoringOrder(query.getDimensions()).appendCacheable(query.getVirtualColumns()).build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Row, Object> prepareForCache() {
            return new Function<Row, Object>() {

                @Override
                public Object apply(Row input) {
                    if (input instanceof MapBasedRow) {
                        final MapBasedRow row = (MapBasedRow) input;
                        final List<Object> retVal = Lists.newArrayListWithCapacity(1 + dims.size() + aggs.size());
                        retVal.add(row.getTimestamp().getMillis());
                        Map<String, Object> event = row.getEvent();
                        for (DimensionSpec dim : dims) {
                            retVal.add(event.get(dim.getOutputName()));
                        }
                        for (AggregatorFactory agg : aggs) {
                            retVal.add(event.get(agg.getName()));
                        }
                        return retVal;
                    }
                    throw new ISE("Don't know how to cache input rows of type[%s]", input.getClass());
                }
            };
        }

        @Override
        public Function<Object, Row> pullFromCache() {
            return new Function<Object, Row>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Row apply(Object input) {
                    Iterator<Object> results = ((List<Object>) input).iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) results.next()).longValue());
                    Map<String, Object> event = Maps.newLinkedHashMap();
                    Iterator<DimensionSpec> dimsIter = dims.iterator();
                    while (dimsIter.hasNext() && results.hasNext()) {
                        final DimensionSpec factory = dimsIter.next();
                        event.put(factory.getOutputName(), results.next());
                    }
                    Iterator<AggregatorFactory> aggsIter = aggs.iterator();
                    while (aggsIter.hasNext() && results.hasNext()) {
                        final AggregatorFactory factory = aggsIter.next();
                        event.put(factory.getName(), factory.deserialize(results.next()));
                    }
                    if (dimsIter.hasNext() || aggsIter.hasNext() || results.hasNext()) {
                        throw new ISE("Found left over objects while reading from cache!! dimsIter[%s] aggsIter[%s] results[%s]", dimsIter.hasNext(), aggsIter.hasNext(), results.hasNext());
                    }
                    return new MapBasedRow(timestamp, event);
                }
            };
        }
    };
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ArrayList(java.util.ArrayList) List(java.util.List) ISE(io.druid.java.util.common.ISE) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) CacheStrategy(io.druid.query.CacheStrategy)

Example 8 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class GroupByQueryQueryToolChest method makePreComputeManipulatorFn.

@Override
public Function<Row, Row> makePreComputeManipulatorFn(final GroupByQuery query, final MetricManipulationFn fn) {
    if (MetricManipulatorFns.identity().equals(fn)) {
        return Functions.identity();
    }
    return new Function<Row, Row>() {

        @Override
        public Row apply(Row input) {
            if (input instanceof MapBasedRow) {
                final MapBasedRow inputRow = (MapBasedRow) input;
                final Map<String, Object> values = Maps.newHashMap(inputRow.getEvent());
                for (AggregatorFactory agg : query.getAggregatorSpecs()) {
                    values.put(agg.getName(), fn.manipulate(agg, inputRow.getEvent().get(agg.getName())));
                }
                return new MapBasedRow(inputRow.getTimestamp(), values);
            }
            return input;
        }
    };
}
Also used : MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 9 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class GroupByQueryQueryToolChest method makePostComputeManipulatorFn.

@Override
public Function<Row, Row> makePostComputeManipulatorFn(final GroupByQuery query, final MetricManipulationFn fn) {
    final Set<String> optimizedDims = ImmutableSet.copyOf(Iterables.transform(extractionsToRewrite(query), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    }));
    final Function<Row, Row> preCompute = makePreComputeManipulatorFn(query, fn);
    if (optimizedDims.isEmpty()) {
        return preCompute;
    }
    // If we have optimizations that can be done at this level, we apply them here
    final Map<String, ExtractionFn> extractionFnMap = new HashMap<>();
    for (DimensionSpec dimensionSpec : query.getDimensions()) {
        final String dimension = dimensionSpec.getOutputName();
        if (optimizedDims.contains(dimension)) {
            extractionFnMap.put(dimension, dimensionSpec.getExtractionFn());
        }
    }
    return new Function<Row, Row>() {

        @Nullable
        @Override
        public Row apply(Row input) {
            Row preRow = preCompute.apply(input);
            if (preRow instanceof MapBasedRow) {
                MapBasedRow preMapRow = (MapBasedRow) preRow;
                Map<String, Object> event = Maps.newHashMap(preMapRow.getEvent());
                for (String dim : optimizedDims) {
                    final Object eventVal = event.get(dim);
                    event.put(dim, extractionFnMap.get(dim).apply(eventVal));
                }
                return new MapBasedRow(preMapRow.getTimestamp(), event);
            } else {
                return preRow;
            }
        }
    };
}
Also used : MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) ExtractionFn(io.druid.query.extraction.ExtractionFn) HashMap(java.util.HashMap) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow)

Example 10 with MapBasedRow

use of io.druid.data.input.MapBasedRow in project druid by druid-io.

the class GroupByStrategyV2 method mergeResults.

@Override
public Sequence<Row> mergeResults(final QueryRunner<Row> baseRunner, final GroupByQuery query, final Map<String, Object> responseContext) {
    // Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
    // involve materialization)
    final ResultMergeQueryRunner<Row> mergingQueryRunner = new ResultMergeQueryRunner<Row>(baseRunner) {

        @Override
        protected Ordering<Row> makeOrdering(Query<Row> queryParam) {
            return ((GroupByQuery) queryParam).getRowOrdering(true);
        }

        @Override
        protected BinaryFn<Row, Row, Row> createMergeFn(Query<Row> queryParam) {
            return new GroupByBinaryFnV2((GroupByQuery) queryParam);
        }
    };
    // Fudge timestamp, maybe.
    final DateTime fudgeTimestamp = getUniversalTimestamp(query);
    return query.applyLimit(Sequences.map(mergingQueryRunner.run(new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), query.getGranularity(), query.getDimensions(), query.getAggregatorSpecs(), // Don't do post aggs until the end of this method.
    ImmutableList.<PostAggregator>of(), // Don't do "having" clause until the end of this method.
    null, null, query.getContext()).withOverriddenContext(ImmutableMap.<String, Object>of("finalize", false, GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2, CTX_KEY_FUDGE_TIMESTAMP, fudgeTimestamp == null ? "" : String.valueOf(fudgeTimestamp.getMillis()), CTX_KEY_OUTERMOST, false)), responseContext), new Function<Row, Row>() {

        @Override
        public Row apply(final Row row) {
            if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true)) {
                return row;
            }
            if (query.getPostAggregatorSpecs().isEmpty() && fudgeTimestamp == null) {
                return row;
            }
            final Map<String, Object> newMap;
            if (query.getPostAggregatorSpecs().isEmpty()) {
                newMap = ((MapBasedRow) row).getEvent();
            } else {
                newMap = Maps.newLinkedHashMap(((MapBasedRow) row).getEvent());
                for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
                    newMap.put(postAggregator.getName(), postAggregator.compute(newMap));
                }
            }
            return new MapBasedRow(fudgeTimestamp != null ? fudgeTimestamp : row.getTimestamp(), newMap);
        }
    }));
}
Also used : ResultMergeQueryRunner(io.druid.query.ResultMergeQueryRunner) GroupByBinaryFnV2(io.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) PostAggregator(io.druid.query.aggregation.PostAggregator) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) GroupByQuery(io.druid.query.groupby.GroupByQuery) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow)

Aggregations

MapBasedRow (io.druid.data.input.MapBasedRow)34 Test (org.junit.Test)21 Row (io.druid.data.input.Row)16 GroupByQueryRunnerTest (io.druid.query.groupby.GroupByQueryRunnerTest)12 DateTime (org.joda.time.DateTime)11 File (java.io.File)9 Sequence (io.druid.java.util.common.guava.Sequence)7 Function (com.google.common.base.Function)6 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)4 AggregationTestHelper (io.druid.query.aggregation.AggregationTestHelper)4 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)4 DimensionSpec (io.druid.query.dimension.DimensionSpec)4 List (java.util.List)4 Map (java.util.Map)4 Interval (org.joda.time.Interval)4 AggregatorsModule (io.druid.jackson.AggregatorsModule)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)3 GroupByQueryEngine (io.druid.query.groupby.GroupByQueryEngine)3 ISE (io.druid.java.util.common.ISE)2