Search in sources :

Example 36 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class AvgSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final List<DruidExpression> arguments = Aggregations.getArgumentsForSimpleAggregator(plannerContext, rowSignature, aggregateCall, project);
    if (arguments == null) {
        return null;
    }
    final String countName = Calcites.makePrefixedName(name, "count");
    final AggregatorFactory count = CountSqlAggregator.createCountAggregatorFactory(countName, plannerContext, rowSignature, virtualColumnRegistry, rexBuilder, aggregateCall, project);
    final String fieldName;
    final String expression;
    final DruidExpression arg = Iterables.getOnlyElement(arguments);
    final ExprMacroTable macroTable = plannerContext.getExprMacroTable();
    final ValueType sumType;
    // Use 64-bit sum regardless of the type of the AVG aggregator.
    if (SqlTypeName.INT_TYPES.contains(aggregateCall.getType().getSqlTypeName())) {
        sumType = ValueType.LONG;
    } else {
        sumType = ValueType.DOUBLE;
    }
    if (arg.isDirectColumnAccess()) {
        fieldName = arg.getDirectColumn();
        expression = null;
    } else {
        // if the filter or anywhere else defined a virtual column for us, re-use it
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
        String vc = virtualColumnRegistry.getVirtualColumnByExpression(arg, resolutionArg.getType());
        fieldName = vc != null ? vc : null;
        expression = vc != null ? null : arg.getExpression();
    }
    final String sumName = Calcites.makePrefixedName(name, "sum");
    final AggregatorFactory sum = SumSqlAggregator.createSumAggregatorFactory(sumType, sumName, fieldName, expression, macroTable);
    return Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
}
Also used : ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ValueType(org.apache.druid.segment.column.ValueType) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 37 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
    final Metadata segmentMetadata;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        FileUtils.mkdirp(outDir);
        SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
        log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
        SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
        closer.register(segmentWriteOutMedium);
        long startTime = System.currentTimeMillis();
        Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
        log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
            if (customSegmentLoader != null) {
                mapper.writeValue(fos, customSegmentLoader);
            } else {
                mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
            }
        }
        log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
        final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
        final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMergerV9> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionHandler handler = handlers.get(mergedDimensions.get(i));
            mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
        }
        /**
         *********** Setup Dim Conversions *************
         */
        progress.progress();
        startTime = System.currentTimeMillis();
        writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /**
         *********** Walk through data sets, merge them, and write merged columns ************
         */
        progress.progress();
        final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
        closer.register(timeAndDimsIterator);
        final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
        final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
        /**
         ********** Create Inverted Indexes and Finalize Build Columns ************
         */
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = mergers.get(i);
            merger.writeIndexes(rowNumConversions);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /**
         *********** Make index.drd & metadata.drd files *************
         */
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) Closer(org.apache.druid.java.util.common.io.Closer) ValueType(org.apache.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TreeMap(java.util.TreeMap) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File)

Example 38 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class IndexMergerV9 method merge.

private File merge(List<IndexableAdapter> indexes, final boolean rollup, final AggregatorFactory[] metricAggs, @Nullable DimensionsSpec dimensionsSpec, File outDir, IndexSpec indexSpec, ProgressIndicator progress, @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
    final List<String> mergedDimensions = IndexMerger.getMergedDimensions(indexes, dimensionsSpec);
    final List<String> mergedMetrics = IndexMerger.mergeIndexed(indexes.stream().map(IndexableAdapter::getMetricNames).collect(Collectors.toList()));
    final AggregatorFactory[] sortedMetricAggs = new AggregatorFactory[mergedMetrics.size()];
    for (AggregatorFactory metricAgg : metricAggs) {
        int metricIndex = mergedMetrics.indexOf(metricAgg.getName());
        /*
        If metricIndex is negative, one of the metricAggs was not present in the union of metrics from the indices
        we are merging
       */
        if (metricIndex > -1) {
            sortedMetricAggs[metricIndex] = metricAgg;
        }
    }
    /*
      If there is nothing at sortedMetricAggs[i], then we did not have a metricAgg whose name matched the name
      of the ith element of mergedMetrics. I.e. There was a metric in the indices to merge that we did not ask for.
     */
    for (int i = 0; i < sortedMetricAggs.length; i++) {
        if (sortedMetricAggs[i] == null) {
            throw new IAE("Indices to merge contained metric[%s], but requested metrics did not", mergedMetrics.get(i));
        }
    }
    for (int i = 0; i < mergedMetrics.size(); i++) {
        if (!sortedMetricAggs[i].getName().equals(mergedMetrics.get(i))) {
            throw new IAE("Metric mismatch, index[%d] [%s] != [%s]", i, sortedMetricAggs[i].getName(), mergedMetrics.get(i));
        }
    }
    Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn;
    if (rollup) {
        rowMergerFn = rowIterators -> new RowCombiningTimeAndDimsIterator(rowIterators, sortedMetricAggs, mergedMetrics);
    } else {
        rowMergerFn = MergingRowIterator::new;
    }
    return makeIndexFiles(indexes, sortedMetricAggs, outDir, progress, mergedDimensions, mergedMetrics, rowMergerFn, true, indexSpec, segmentWriteOutMediumFactory);
}
Also used : List(java.util.List) ArrayList(java.util.ArrayList) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE)

Example 39 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class InputRowSerde method fromBytes.

public static InputRow fromBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap, byte[] data, AggregatorFactory[] aggs) {
    try {
        ByteArrayDataInput in = ByteStreams.newDataInput(data);
        // Read timestamp
        long timestamp = in.readLong();
        Map<String, Object> event = new HashMap<>();
        // Read dimensions
        List<String> dimensions = new ArrayList<>();
        int dimNum = WritableUtils.readVInt(in);
        for (int i = 0; i < dimNum; i++) {
            String dimension = readString(in);
            dimensions.add(dimension);
            IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dimension);
            if (typeHelper == null) {
                typeHelper = STRING_HELPER;
            }
            Object dimValues = typeHelper.deserialize(in);
            if (dimValues == null) {
                continue;
            }
            if (typeHelper.getType() == ValueType.STRING) {
                List<String> dimensionValues = (List<String>) dimValues;
                if (dimensionValues.size() == 1) {
                    event.put(dimension, dimensionValues.get(0));
                } else {
                    event.put(dimension, dimensionValues);
                }
            } else {
                event.put(dimension, dimValues);
            }
        }
        // Read metrics
        int metricSize = WritableUtils.readVInt(in);
        for (int i = 0; i < metricSize; i++) {
            final String metric = readString(in);
            final AggregatorFactory agg = getAggregator(metric, aggs, i);
            final ColumnType type = agg.getIntermediateType();
            final byte metricNullability = in.readByte();
            if (metricNullability == NullHandling.IS_NULL_BYTE) {
                // metric value is null.
                continue;
            }
            if (type.is(ValueType.FLOAT)) {
                event.put(metric, in.readFloat());
            } else if (type.is(ValueType.LONG)) {
                event.put(metric, WritableUtils.readVLong(in));
            } else if (type.is(ValueType.DOUBLE)) {
                event.put(metric, in.readDouble());
            } else {
                ComplexMetricSerde serde = getComplexMetricSerde(agg.getIntermediateType().getComplexTypeName());
                byte[] value = readBytes(in);
                event.put(metric, serde.fromBytes(value, 0, value.length));
            }
        }
        return new MapBasedInputRow(timestamp, dimensions, event);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ComplexMetricSerde(org.apache.druid.segment.serde.ComplexMetricSerde) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ByteArrayDataInput(com.google.common.io.ByteArrayDataInput) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ArrayList(java.util.ArrayList) List(java.util.List) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow)

Example 40 with AggregatorFactory

use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.

private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
    config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
    config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
    return config;
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)163 Test (org.junit.Test)85 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)56 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)48 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)42 ArrayList (java.util.ArrayList)33 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)33 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)28 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 Nullable (javax.annotation.Nullable)22 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)22 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)22 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)22 HashMap (java.util.HashMap)20 List (java.util.List)20 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)18 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)18 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)18 Map (java.util.Map)17 FloatFirstAggregatorFactory (org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory)15