Search in sources :

Example 86 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

@Override
protected File makeIndexFiles(final List<IndexableAdapter> adapters, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, new Function<IndexableAdapter, Metadata>() {

        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        final File v9TmpDir = new File(outDir, "v9-tmp");
        FileUtils.forceMkdir(v9TmpDir);
        registerDeleteDirectory(closer, v9TmpDir);
        log.info("Start making v9 index files, outDir:%s", outDir);
        File tmpPeonFilesDir = new File(v9TmpDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, false);
        closer.register(ioPeon);
        long startTime = System.currentTimeMillis();
        ByteStreams.write(Ints.toByteArray(IndexIO.V9_VERSION), Files.newOutputStreamSupplier(new File(outDir, "version.bin")));
        log.info("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
        }
        log.info("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
        final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
        final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            mergers.add(handlers[i].makeMerger(indexSpec, v9TmpDir, ioPeon, dimCapabilities.get(i), progress));
        }
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
        final ArrayList<Boolean> dimensionSkipFlag = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
        writeDimValueAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.info("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /************* Walk through data sets, merge them, and write merged columns *************/
        progress.progress();
        final Iterable<Rowboat> theRows = makeRowIterable(adapters, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        final LongColumnSerializer timeWriter = setupTimeWriter(ioPeon, indexSpec);
        final ArrayList<GenericColumnSerializer> metWriters = setupMetricsWriters(ioPeon, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());
        mergeIndexesAndWriteColumns(adapters, progress, theRows, timeWriter, metWriters, rowNumConversions, mergers);
        /************ Create Inverted Indexes and Finalize Build Columns *************/
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metWriters);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = (DimensionMergerV9) mergers.get(i);
            merger.writeIndexes(rowNumConversions, closer);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /************* Make index.drd & metadata.drd files **************/
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(io.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(io.druid.segment.column.ColumnDescriptor) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Example 87 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class IndexMerger method merge.

public File merge(List<IndexableAdapter> indexes, final boolean rollup, final AggregatorFactory[] metricAggs, File outDir, IndexSpec indexSpec, ProgressIndicator progress) throws IOException {
    FileUtils.deleteDirectory(outDir);
    FileUtils.forceMkdir(outDir);
    final List<String> mergedDimensions = getMergedDimensions(indexes);
    final List<String> mergedMetrics = Lists.transform(mergeIndexed(Lists.newArrayList(FunctionalIterable.create(indexes).transform(new Function<IndexableAdapter, Iterable<String>>() {

        @Override
        public Iterable<String> apply(@Nullable IndexableAdapter input) {
            return input.getMetricNames();
        }
    }))), new Function<String, String>() {

        @Override
        public String apply(@Nullable String input) {
            return input;
        }
    });
    final AggregatorFactory[] sortedMetricAggs = new AggregatorFactory[mergedMetrics.size()];
    for (int i = 0; i < metricAggs.length; i++) {
        AggregatorFactory metricAgg = metricAggs[i];
        int metricIndex = mergedMetrics.indexOf(metricAgg.getName());
        /*
        If metricIndex is negative, one of the metricAggs was not present in the union of metrics from the indices
        we are merging
       */
        if (metricIndex > -1) {
            sortedMetricAggs[metricIndex] = metricAgg;
        }
    }
    /*
      If there is nothing at sortedMetricAggs[i], then we did not have a metricAgg whose name matched the name
      of the ith element of mergedMetrics. I.e. There was a metric in the indices to merge that we did not ask for.
     */
    for (int i = 0; i < sortedMetricAggs.length; i++) {
        if (sortedMetricAggs[i] == null) {
            throw new IAE("Indices to merge contained metric[%s], but requested metrics did not", mergedMetrics.get(i));
        }
    }
    for (int i = 0; i < mergedMetrics.size(); i++) {
        if (!sortedMetricAggs[i].getName().equals(mergedMetrics.get(i))) {
            throw new IAE("Metric mismatch, index[%d] [%s] != [%s]", i, sortedMetricAggs[i].getName(), mergedMetrics.get(i));
        }
    }
    Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn = new Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>>() {

        @Override
        public Iterable<Rowboat> apply(@Nullable ArrayList<Iterable<Rowboat>> boats) {
            if (rollup) {
                return CombiningIterable.create(new MergeIterable<Rowboat>(Ordering.<Rowboat>natural().nullsFirst(), boats), Ordering.<Rowboat>natural().nullsFirst(), new RowboatMergeFunction(sortedMetricAggs));
            } else {
                return new MergeIterable<Rowboat>(new Ordering<Rowboat>() {

                    @Override
                    public int compare(Rowboat left, Rowboat right) {
                        return Longs.compare(left.getTimestamp(), right.getTimestamp());
                    }
                }.nullsFirst(), boats);
            }
        }
    };
    return makeIndexFiles(indexes, sortedMetricAggs, outDir, progress, mergedDimensions, mergedMetrics, rowMergerFn, indexSpec);
}
Also used : MergeIterable(io.druid.java.util.common.guava.MergeIterable) FunctionalIterable(io.druid.java.util.common.guava.FunctionalIterable) IndexedIterable(io.druid.segment.data.IndexedIterable) CombiningIterable(io.druid.collections.CombiningIterable) ArrayList(java.util.ArrayList) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) IAE(io.druid.java.util.common.IAE) Function(com.google.common.base.Function) MergeIterable(io.druid.java.util.common.guava.MergeIterable) Ordering(com.google.common.collect.Ordering) Nullable(javax.annotation.Nullable)

Example 88 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TimeseriesQueryQueryToolChest method makeComputeManipulatorFn.

private Function<Result<TimeseriesResultValue>, Result<TimeseriesResultValue>> makeComputeManipulatorFn(final TimeseriesQuery query, final MetricManipulationFn fn, final boolean calculatePostAggs) {
    return new Function<Result<TimeseriesResultValue>, Result<TimeseriesResultValue>>() {

        @Override
        public Result<TimeseriesResultValue> apply(Result<TimeseriesResultValue> result) {
            final TimeseriesResultValue holder = result.getValue();
            final Map<String, Object> values = Maps.newHashMap(holder.getBaseObject());
            if (calculatePostAggs) {
                // put non finalized aggregators for calculating dependent post Aggregators
                for (AggregatorFactory agg : query.getAggregatorSpecs()) {
                    values.put(agg.getName(), holder.getMetric(agg.getName()));
                }
                for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
                    values.put(postAgg.getName(), postAgg.compute(values));
                }
            }
            for (AggregatorFactory agg : query.getAggregatorSpecs()) {
                values.put(agg.getName(), fn.manipulate(agg, holder.getMetric(agg.getName())));
            }
            return new Result<TimeseriesResultValue>(result.getTimestamp(), new TimeseriesResultValue(values));
        }
    };
}
Also used : Function(com.google.common.base.Function) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Result(io.druid.query.Result)

Example 89 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TopNQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> getCacheStrategy(final TopNQuery query) {
    return new CacheStrategy<Result<TopNResultValue>, Object, TopNQuery>() {

        private final List<AggregatorFactory> aggs = Lists.newArrayList(query.getAggregatorSpecs());

        private final List<PostAggregator> postAggs = AggregatorUtil.pruneDependentPostAgg(query.getPostAggregatorSpecs(), query.getTopNMetricSpec().getMetricName(query.getDimensionSpec()));

        @Override
        public boolean isCacheable(TopNQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(TopNQuery query) {
            final CacheKeyBuilder builder = new CacheKeyBuilder(TOPN_QUERY).appendCacheable(query.getDimensionSpec()).appendCacheable(query.getTopNMetricSpec()).appendInt(query.getThreshold()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns());
            final List<PostAggregator> postAggregators = prunePostAggregators(query);
            if (!postAggregators.isEmpty()) {
                // Append post aggregators only when they are used as sort keys.
                // Note that appending an empty list produces a different cache key from not appending it.
                builder.appendCacheablesIgnoringOrder(postAggregators);
            }
            return builder.build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<TopNResultValue>, Object> prepareForCache() {
            return new Function<Result<TopNResultValue>, Object>() {

                private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());

                @Override
                public Object apply(final Result<TopNResultValue> input) {
                    List<DimensionAndMetricValueExtractor> results = Lists.newArrayList(input.getValue());
                    final List<Object> retVal = Lists.newArrayListWithCapacity(results.size() + 1);
                    // make sure to preserve timezone information when caching results
                    retVal.add(input.getTimestamp().getMillis());
                    for (DimensionAndMetricValueExtractor result : results) {
                        List<Object> vals = Lists.newArrayListWithCapacity(aggFactoryNames.length + 2);
                        vals.add(result.getDimensionValue(query.getDimensionSpec().getOutputName()));
                        for (String aggName : aggFactoryNames) {
                            vals.add(result.getMetric(aggName));
                        }
                        retVal.add(vals);
                    }
                    return retVal;
                }
            };
        }

        @Override
        public Function<Object, Result<TopNResultValue>> pullFromCache() {
            return new Function<Object, Result<TopNResultValue>>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Result<TopNResultValue> apply(Object input) {
                    List<Object> results = (List<Object>) input;
                    List<Map<String, Object>> retVal = Lists.newArrayListWithCapacity(results.size());
                    Iterator<Object> inputIter = results.iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) inputIter.next()).longValue());
                    while (inputIter.hasNext()) {
                        List<Object> result = (List<Object>) inputIter.next();
                        Map<String, Object> vals = Maps.newLinkedHashMap();
                        Iterator<AggregatorFactory> aggIter = aggs.iterator();
                        Iterator<Object> resultIter = result.iterator();
                        vals.put(query.getDimensionSpec().getOutputName(), resultIter.next());
                        while (aggIter.hasNext() && resultIter.hasNext()) {
                            final AggregatorFactory factory = aggIter.next();
                            vals.put(factory.getName(), factory.deserialize(resultIter.next()));
                        }
                        for (PostAggregator postAgg : postAggs) {
                            vals.put(postAgg.getName(), postAgg.compute(vals));
                        }
                        retVal.add(vals);
                    }
                    return new Result<>(timestamp, new TopNResultValue(retVal));
                }
            };
        }
    };
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) Granularity(io.druid.java.util.common.granularity.Granularity) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) Map(java.util.Map) CacheStrategy(io.druid.query.CacheStrategy)

Example 90 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TopNBinaryFn method apply.

@Override
public Result<TopNResultValue> apply(Result<TopNResultValue> arg1, Result<TopNResultValue> arg2) {
    if (arg1 == null) {
        return merger.getResult(arg2, comparator);
    }
    if (arg2 == null) {
        return merger.getResult(arg1, comparator);
    }
    Map<Object, DimensionAndMetricValueExtractor> retVals = new LinkedHashMap<>();
    TopNResultValue arg1Vals = arg1.getValue();
    TopNResultValue arg2Vals = arg2.getValue();
    for (DimensionAndMetricValueExtractor arg1Val : arg1Vals) {
        retVals.put(arg1Val.getDimensionValue(dimension), arg1Val);
    }
    for (DimensionAndMetricValueExtractor arg2Val : arg2Vals) {
        final Object dimensionValue = arg2Val.getDimensionValue(dimension);
        DimensionAndMetricValueExtractor arg1Val = retVals.get(dimensionValue);
        if (arg1Val != null) {
            // size of map = aggregator + topNDim + postAgg (If sorting is done on post agg field)
            Map<String, Object> retVal = new LinkedHashMap<>(aggregations.size() + 2);
            retVal.put(dimension, dimensionValue);
            for (AggregatorFactory factory : aggregations) {
                final String metricName = factory.getName();
                retVal.put(metricName, factory.combine(arg1Val.getMetric(metricName), arg2Val.getMetric(metricName)));
            }
            for (PostAggregator pf : postAggregations) {
                retVal.put(pf.getName(), pf.compute(retVal));
            }
            retVals.put(dimensionValue, new DimensionAndMetricValueExtractor(retVal));
        } else {
            retVals.put(dimensionValue, arg2Val);
        }
    }
    final DateTime timestamp;
    if (gran instanceof AllGranularity) {
        timestamp = arg1.getTimestamp();
    } else {
        timestamp = gran.bucketStart(arg1.getTimestamp());
    }
    TopNResultBuilder bob = topNMetricSpec.getResultBuilder(timestamp, dimSpec, threshold, comparator, aggregations, postAggregations);
    for (DimensionAndMetricValueExtractor extractor : retVals.values()) {
        bob.addEntry(extractor);
    }
    return bob.build();
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) AllGranularity(io.druid.java.util.common.granularity.AllGranularity) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)148 Test (org.junit.Test)86 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)82 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)64 Interval (org.joda.time.Interval)45 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)38 DateTime (org.joda.time.DateTime)37 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)32 Result (io.druid.query.Result)31 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)27 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 Row (io.druid.data.input.Row)24 PostAggregator (io.druid.query.aggregation.PostAggregator)24 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)22 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)19 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)18 LongFirstAggregatorFactory (io.druid.query.aggregation.first.LongFirstAggregatorFactory)18 LongLastAggregatorFactory (io.druid.query.aggregation.last.LongLastAggregatorFactory)18 DimensionSpec (io.druid.query.dimension.DimensionSpec)18 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)17