Search in sources :

Example 26 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class IndexMerger method makeIndexFiles.

protected File makeIndexFiles(final List<IndexableAdapter> indexes, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
    List<Metadata> metadataList = Lists.transform(indexes, new Function<IndexableAdapter, Metadata>() {

        @Nullable
        @Override
        public Metadata apply(IndexableAdapter input) {
            return input.getMetadata();
        }
    });
    Metadata segmentMetadata = null;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    final Map<String, ValueType> valueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
    final Map<String, ColumnCapabilitiesImpl> columnCapabilities = Maps.newHashMap();
    final List<ColumnCapabilitiesImpl> dimCapabilities = new ArrayList<>();
    for (IndexableAdapter adapter : indexes) {
        for (String dimension : adapter.getDimensionNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(dimension);
            ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(dimension, mergedCapabilities.merge(capabilities));
        }
        for (String metric : adapter.getMetricNames()) {
            ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(metric);
            ColumnCapabilities capabilities = adapter.getCapabilities(metric);
            if (mergedCapabilities == null) {
                mergedCapabilities = new ColumnCapabilitiesImpl();
            }
            columnCapabilities.put(metric, mergedCapabilities.merge(capabilities));
            valueTypes.put(metric, capabilities.getType());
            metricTypeNames.put(metric, adapter.getMetricType(metric));
        }
    }
    for (String dimension : mergedDimensions) {
        dimCapabilities.add(columnCapabilities.get(dimension));
    }
    Closer closer = Closer.create();
    try {
        final Interval dataInterval;
        final File v8OutDir = new File(outDir, "v8-tmp");
        FileUtils.forceMkdir(v8OutDir);
        registerDeleteDirectory(closer, v8OutDir);
        File tmpPeonFilesDir = new File(v8OutDir, "tmpPeonFiles");
        FileUtils.forceMkdir(tmpPeonFilesDir);
        registerDeleteDirectory(closer, tmpPeonFilesDir);
        final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, true);
        closer.register(ioPeon);
        /*************  Main index.drd file **************/
        progress.progress();
        long startTime = System.currentTimeMillis();
        File indexFile = new File(v8OutDir, "index.drd");
        try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
            FileChannel channel = fileOutputStream.getChannel()) {
            channel.write(ByteBuffer.wrap(new byte[] { IndexIO.V8_VERSION }));
            GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
            DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
            DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
            for (IndexableAdapter index : indexes) {
                minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
                maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
            }
            dataInterval = new Interval(minTime, maxTime);
            serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
            serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
        }
        IndexIO.checkFileSize(indexFile);
        log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Setup Dim Conversions **************/
        progress.progress();
        startTime = System.currentTimeMillis();
        final ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
        final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMerger> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy merger = handlers[i].makeLegacyMerger(indexSpec, v8OutDir, ioPeon, dimCapabilities.get(i), progress);
            mergers.add(merger);
            merger.writeMergedValueMetadata(indexes);
            FileOutputSupplier dimOut = new FileOutputSupplier(merger.makeDimFile(), true);
            merger.writeValueMetadataToFile(dimOut);
            dimOuts.add(dimOut);
        }
        log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        /************* Walk through data sets and merge them *************/
        progress.progress();
        startTime = System.currentTimeMillis();
        Iterable<Rowboat> theRows = makeRowIterable(indexes, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
        LongSupplierSerializer timeWriter = CompressionFactory.getLongSerializer(ioPeon, "little_end_time", IndexIO.BYTE_ORDER, indexSpec.getLongEncoding(), CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY);
        timeWriter.open();
        ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
        final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression();
        final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding();
        for (String metric : mergedMetrics) {
            ValueType type = valueTypes.get(metric);
            switch(type) {
                case LONG:
                    metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression, longEncoding));
                    break;
                case FLOAT:
                    metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression));
                    break;
                case COMPLEX:
                    final String typeName = metricTypeNames.get(metric);
                    ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
                    if (serde == null) {
                        throw new ISE("Unknown type[%s]", typeName);
                    }
                    metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
                    break;
                default:
                    throw new ISE("Unknown type[%s]", type);
            }
        }
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.open();
        }
        int rowCount = 0;
        long time = System.currentTimeMillis();
        List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
        for (IndexableAdapter index : indexes) {
            int[] arr = new int[index.getNumRows()];
            Arrays.fill(arr, INVALID_ROW);
            rowNumConversions.add(IntBuffer.wrap(arr));
        }
        for (Rowboat theRow : theRows) {
            progress.progress();
            timeWriter.add(theRow.getTimestamp());
            final Object[] metrics = theRow.getMetrics();
            for (int i = 0; i < metrics.length; ++i) {
                metWriters.get(i).serialize(metrics[i]);
            }
            Object[] dims = theRow.getDims();
            for (int i = 0; i < dims.length; ++i) {
                mergers.get(i).processMergedRow(dims[i]);
            }
            for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
                final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
                for (Integer rowNum : comprisedRow.getValue()) {
                    while (conversionBuffer.position() < rowNum) {
                        conversionBuffer.put(INVALID_ROW);
                    }
                    conversionBuffer.put(rowCount);
                }
            }
            if ((++rowCount % 500000) == 0) {
                log.info("outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time);
                time = System.currentTimeMillis();
            }
        }
        for (IntBuffer rowNumConversion : rowNumConversions) {
            rowNumConversion.rewind();
        }
        final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
        timeFile.delete();
        ByteSink out = Files.asByteSink(timeFile, FileWriteMode.APPEND);
        timeWriter.closeAndConsolidate(out);
        IndexIO.checkFileSize(timeFile);
        for (MetricColumnSerializer metWriter : metWriters) {
            metWriter.close();
        }
        log.info("outDir[%s] completed walk through of %,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - startTime);
        /************ Create Inverted Indexes and Finalize Columns *************/
        startTime = System.currentTimeMillis();
        final File invertedFile = new File(v8OutDir, "inverted.drd");
        Files.touch(invertedFile);
        out = Files.asByteSink(invertedFile, FileWriteMode.APPEND);
        final File geoFile = new File(v8OutDir, "spatial.drd");
        Files.touch(geoFile);
        OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerLegacy legacyMerger = (DimensionMergerLegacy) mergers.get(i);
            legacyMerger.writeIndexes(rowNumConversions, closer);
            legacyMerger.writeIndexesToFiles(out, spatialOut);
            legacyMerger.writeRowValuesToFile(dimOuts.get(i));
        }
        log.info("outDir[%s] completed inverted.drd and wrote dimensions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
        final Function<String, String> dimFilenameFunction = new Function<String, String>() {

            @Override
            public String apply(@Nullable String input) {
                String formatString;
                if (columnCapabilities.get(input).isDictionaryEncoded()) {
                    formatString = "dim_%s.drd";
                } else {
                    formatString = String.format("numeric_dim_%%s_%s.drd", IndexIO.BYTE_ORDER);
                }
                return GuavaUtils.formatFunction(formatString).apply(input);
            }
        };
        final ArrayList<String> expectedFiles = Lists.newArrayList(Iterables.concat(Arrays.asList("index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)), Iterables.transform(mergedDimensions, dimFilenameFunction), Iterables.transform(mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)))));
        if (segmentMetadata != null) {
            writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
            log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
            expectedFiles.add("metadata.drd");
        }
        Map<String, File> files = Maps.newLinkedHashMap();
        for (String fileName : expectedFiles) {
            files.put(fileName, new File(v8OutDir, fileName));
        }
        File smooshDir = new File(v8OutDir, "smoosher");
        FileUtils.forceMkdir(smooshDir);
        for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
            entry.getValue().delete();
        }
        for (File file : smooshDir.listFiles()) {
            Files.move(file, new File(v8OutDir, file.getName()));
        }
        if (!smooshDir.delete()) {
            log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
            throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
        }
        createIndexDrdFile(IndexIO.V8_VERSION, v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY), dataInterval, indexSpec.getBitmapSerdeFactory());
        indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) CompressedObjectStrategy(io.druid.segment.data.CompressedObjectStrategy) DateTime(org.joda.time.DateTime) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) TreeSet(java.util.TreeSet) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File) Map(java.util.Map) Nullable(javax.annotation.Nullable) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl) Interval(org.joda.time.Interval) ComplexMetricSerde(io.druid.segment.serde.ComplexMetricSerde) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) ColumnCapabilities(io.druid.segment.column.ColumnCapabilities) Function(com.google.common.base.Function) ByteSink(com.google.common.io.ByteSink) ISE(io.druid.java.util.common.ISE) ComplexMetricColumnSerializer(io.druid.segment.serde.ComplexMetricColumnSerializer) Closer(com.google.common.io.Closer) ValueType(io.druid.segment.column.ValueType) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CompressionFactory(io.druid.segment.data.CompressionFactory) FileOutputSupplier(io.druid.common.guava.FileOutputSupplier) LongSupplierSerializer(io.druid.segment.data.LongSupplierSerializer)

Example 27 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TopNQueryQueryToolChest method makePostComputeManipulatorFn.

@Override
public Function<Result<TopNResultValue>, Result<TopNResultValue>> makePostComputeManipulatorFn(final TopNQuery query, final MetricManipulationFn fn) {
    return new Function<Result<TopNResultValue>, Result<TopNResultValue>>() {

        private String dimension = query.getDimensionSpec().getOutputName();

        private final AggregatorFactory[] aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);

        private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());

        private final PostAggregator[] postAggregators = query.getPostAggregatorSpecs().toArray(new PostAggregator[0]);

        @Override
        public Result<TopNResultValue> apply(Result<TopNResultValue> result) {
            List<Map<String, Object>> serializedValues = Lists.newArrayList(Iterables.transform(result.getValue(), new Function<DimensionAndMetricValueExtractor, Map<String, Object>>() {

                @Override
                public Map<String, Object> apply(DimensionAndMetricValueExtractor input) {
                    final Map<String, Object> values = Maps.newHashMapWithExpectedSize(aggregatorFactories.length + query.getPostAggregatorSpecs().size() + 1);
                    for (int i = 0; i < aggFactoryNames.length; ++i) {
                        final String name = aggFactoryNames[i];
                        values.put(name, input.getMetric(name));
                    }
                    for (PostAggregator postAgg : postAggregators) {
                        Object calculatedPostAgg = input.getMetric(postAgg.getName());
                        if (calculatedPostAgg != null) {
                            values.put(postAgg.getName(), calculatedPostAgg);
                        } else {
                            values.put(postAgg.getName(), postAgg.compute(values));
                        }
                    }
                    for (int i = 0; i < aggFactoryNames.length; ++i) {
                        final String name = aggFactoryNames[i];
                        values.put(name, fn.manipulate(aggregatorFactories[i], input.getMetric(name)));
                    }
                    values.put(dimension, input.getDimensionValue(dimension));
                    return values;
                }
            }));
            return new Result<>(result.getTimestamp(), new TopNResultValue(serializedValues));
        }
    };
}
Also used : Function(com.google.common.base.Function) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Map(java.util.Map) Result(io.druid.query.Result)

Example 28 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TimeseriesBinaryFn method apply.

@Override
public Result<TimeseriesResultValue> apply(Result<TimeseriesResultValue> arg1, Result<TimeseriesResultValue> arg2) {
    if (arg1 == null) {
        return arg2;
    }
    if (arg2 == null) {
        return arg1;
    }
    TimeseriesResultValue arg1Val = arg1.getValue();
    TimeseriesResultValue arg2Val = arg2.getValue();
    Map<String, Object> retVal = new LinkedHashMap<String, Object>();
    for (AggregatorFactory factory : aggregations) {
        final String metricName = factory.getName();
        retVal.put(metricName, factory.combine(arg1Val.getMetric(metricName), arg2Val.getMetric(metricName)));
    }
    return (gran instanceof AllGranularity) ? new Result<TimeseriesResultValue>(arg1.getTimestamp(), new TimeseriesResultValue(retVal)) : new Result<TimeseriesResultValue>(gran.bucketStart(arg1.getTimestamp()), new TimeseriesResultValue(retVal));
}
Also used : AllGranularity(io.druid.java.util.common.granularity.AllGranularity) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LinkedHashMap(java.util.LinkedHashMap)

Example 29 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TimeseriesQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query) {
    return new CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery>() {

        private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();

        @Override
        public boolean isCacheable(TimeseriesQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(TimeseriesQuery query) {
            return new CacheKeyBuilder(TIMESERIES_QUERY).appendBoolean(query.isDescending()).appendBoolean(query.isSkipEmptyBuckets()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns()).build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<TimeseriesResultValue>, Object> prepareForCache() {
            return new Function<Result<TimeseriesResultValue>, Object>() {

                @Override
                public Object apply(final Result<TimeseriesResultValue> input) {
                    TimeseriesResultValue results = input.getValue();
                    final List<Object> retVal = Lists.newArrayListWithCapacity(1 + aggs.size());
                    retVal.add(input.getTimestamp().getMillis());
                    for (AggregatorFactory agg : aggs) {
                        retVal.add(results.getMetric(agg.getName()));
                    }
                    return retVal;
                }
            };
        }

        @Override
        public Function<Object, Result<TimeseriesResultValue>> pullFromCache() {
            return new Function<Object, Result<TimeseriesResultValue>>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Result<TimeseriesResultValue> apply(@Nullable Object input) {
                    List<Object> results = (List<Object>) input;
                    Map<String, Object> retVal = Maps.newLinkedHashMap();
                    Iterator<AggregatorFactory> aggsIter = aggs.iterator();
                    Iterator<Object> resultIter = results.iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue());
                    while (aggsIter.hasNext() && resultIter.hasNext()) {
                        final AggregatorFactory factory = aggsIter.next();
                        retVal.put(factory.getName(), factory.deserialize(resultIter.next()));
                    }
                    return new Result<TimeseriesResultValue>(timestamp, new TimeseriesResultValue(retVal));
                }
            };
        }
    };
}
Also used : CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) CacheStrategy(io.druid.query.CacheStrategy) Nullable(javax.annotation.Nullable)

Example 30 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class BaseTopNAlgorithm method makeAggregators.

public static Aggregator[] makeAggregators(Cursor cursor, List<AggregatorFactory> aggregatorSpecs) {
    Aggregator[] aggregators = new Aggregator[aggregatorSpecs.size()];
    int aggregatorIndex = 0;
    for (AggregatorFactory spec : aggregatorSpecs) {
        aggregators[aggregatorIndex] = spec.factorize(cursor);
        ++aggregatorIndex;
    }
    return aggregators;
}
Also used : Aggregator(io.druid.query.aggregation.Aggregator) BufferAggregator(io.druid.query.aggregation.BufferAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Aggregations

AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)148 Test (org.junit.Test)86 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)82 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)64 Interval (org.joda.time.Interval)45 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)38 DateTime (org.joda.time.DateTime)37 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)32 Result (io.druid.query.Result)31 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)27 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 Row (io.druid.data.input.Row)24 PostAggregator (io.druid.query.aggregation.PostAggregator)24 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)22 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)19 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)18 LongFirstAggregatorFactory (io.druid.query.aggregation.first.LongFirstAggregatorFactory)18 LongLastAggregatorFactory (io.druid.query.aggregation.last.LongLastAggregatorFactory)18 DimensionSpec (io.druid.query.dimension.DimensionSpec)18 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)17