Search in sources :

Example 1 with ValueType

use of org.apache.druid.segment.column.ValueType in project druid by druid-io.

the class AvgSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    final List<DruidExpression> arguments = Aggregations.getArgumentsForSimpleAggregator(plannerContext, rowSignature, aggregateCall, project);
    if (arguments == null) {
        return null;
    }
    final String countName = Calcites.makePrefixedName(name, "count");
    final AggregatorFactory count = CountSqlAggregator.createCountAggregatorFactory(countName, plannerContext, rowSignature, virtualColumnRegistry, rexBuilder, aggregateCall, project);
    final String fieldName;
    final String expression;
    final DruidExpression arg = Iterables.getOnlyElement(arguments);
    final ExprMacroTable macroTable = plannerContext.getExprMacroTable();
    final ValueType sumType;
    // Use 64-bit sum regardless of the type of the AVG aggregator.
    if (SqlTypeName.INT_TYPES.contains(aggregateCall.getType().getSqlTypeName())) {
        sumType = ValueType.LONG;
    } else {
        sumType = ValueType.DOUBLE;
    }
    if (arg.isDirectColumnAccess()) {
        fieldName = arg.getDirectColumn();
        expression = null;
    } else {
        // if the filter or anywhere else defined a virtual column for us, re-use it
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
        String vc = virtualColumnRegistry.getVirtualColumnByExpression(arg, resolutionArg.getType());
        fieldName = vc != null ? vc : null;
        expression = vc != null ? null : arg.getExpression();
    }
    final String sumName = Calcites.makePrefixedName(name, "sum");
    final AggregatorFactory sum = SumSqlAggregator.createSumAggregatorFactory(sumType, sumName, fieldName, expression, macroTable);
    return Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
}
Also used : ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) ValueType(org.apache.druid.segment.column.ValueType) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 2 with ValueType

use of org.apache.druid.segment.column.ValueType in project druid by druid-io.

the class ColumnProcessors method makeProcessorInternal.

/**
 * Creates "column processors", which are objects that wrap a single input column and provide some
 * functionality on top of it.
 *
 * @param inputCapabilitiesFn   function that returns capabilities of the column being processed. The type provided
 *                              by these capabilities will be used to determine what kind of selector to create. If
 *                              this function returns null, then processorFactory.defaultType() will be
 *                              used to construct a set of assumed capabilities.
 * @param dimensionSelectorFn   function that creates a DimensionSelector for the column being processed. Will be
 *                              called if the column type is string.
 * @param valueSelectorFunction function that creates a ColumnValueSelector for the column being processed. Will be
 *                              called if the column type is long, float, double, or complex.
 * @param processorFactory      object that encapsulates the knowledge about how to create processors
 * @param selectorFactory       column selector factory used for creating the vector processor
 */
private static <T> T makeProcessorInternal(final Function<ColumnSelectorFactory, ColumnCapabilities> inputCapabilitiesFn, final Function<ColumnSelectorFactory, DimensionSelector> dimensionSelectorFn, final Function<ColumnSelectorFactory, ColumnValueSelector<?>> valueSelectorFunction, final ColumnProcessorFactory<T> processorFactory, final ColumnSelectorFactory selectorFactory) {
    final ColumnCapabilities capabilities = inputCapabilitiesFn.apply(selectorFactory);
    final TypeSignature<ValueType> effectiveType = capabilities != null ? capabilities : processorFactory.defaultType();
    switch(effectiveType.getType()) {
        case STRING:
            return processorFactory.makeDimensionProcessor(dimensionSelectorFn.apply(selectorFactory), mayBeMultiValue(capabilities));
        case LONG:
            return processorFactory.makeLongProcessor(valueSelectorFunction.apply(selectorFactory));
        case FLOAT:
            return processorFactory.makeFloatProcessor(valueSelectorFunction.apply(selectorFactory));
        case DOUBLE:
            return processorFactory.makeDoubleProcessor(valueSelectorFunction.apply(selectorFactory));
        case COMPLEX:
            return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory));
        default:
            throw new ISE("Unsupported type[%s]", effectiveType.asTypeString());
    }
}
Also used : ValueType(org.apache.druid.segment.column.ValueType) ISE(org.apache.druid.java.util.common.ISE) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 3 with ValueType

use of org.apache.druid.segment.column.ValueType in project druid by druid-io.

the class ColumnValueGenerator method generateSingleRowValue.

private Object generateSingleRowValue() {
    Object ret = null;
    ValueType type = schema.getType();
    if (distribution instanceof AbstractIntegerDistribution) {
        ret = ((AbstractIntegerDistribution) distribution).sample();
    } else if (distribution instanceof AbstractRealDistribution) {
        ret = ((AbstractRealDistribution) distribution).sample();
    } else if (distribution instanceof EnumeratedDistribution) {
        ret = ((EnumeratedDistribution) distribution).sample();
    }
    ret = convertType(ret, type);
    return ret;
}
Also used : AbstractRealDistribution(org.apache.commons.math3.distribution.AbstractRealDistribution) ValueType(org.apache.druid.segment.column.ValueType) EnumeratedDistribution(org.apache.commons.math3.distribution.EnumeratedDistribution) AbstractIntegerDistribution(org.apache.commons.math3.distribution.AbstractIntegerDistribution)

Example 4 with ValueType

use of org.apache.druid.segment.column.ValueType in project druid by druid-io.

the class ColumnValueGenerator method initDistribution.

private void initDistribution() {
    GeneratorColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
    ValueType type = schema.getType();
    List<Object> enumeratedValues = schema.getEnumeratedValues();
    List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
    List<Pair<Object, Double>> probabilities = new ArrayList<>();
    switch(distributionType) {
        case SEQUENTIAL:
            // not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
            distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(), schema.getEnumeratedValues());
            break;
        case UNIFORM:
            distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
            break;
        case DISCRETE_UNIFORM:
            if (enumeratedValues == null) {
                enumeratedValues = new ArrayList<>();
                for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
                    Object val = convertType(i, type);
                    enumeratedValues.add(val);
                }
            }
            // give them all equal probability, the library will normalize probabilities to sum to 1.0
            for (Object enumeratedValue : enumeratedValues) {
                probabilities.add(new Pair<>(enumeratedValue, 0.1));
            }
            distribution = new EnumeratedTreeDistribution<>(probabilities);
            break;
        case NORMAL:
            distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
            break;
        case ROUNDED_NORMAL:
            NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
            distribution = new RealRoundingDistribution(normalDist);
            break;
        case ZIPF:
            int cardinality;
            if (enumeratedValues == null) {
                Integer startInt = schema.getStartInt();
                cardinality = schema.getEndInt() - startInt;
                ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
                for (int i = 0; i < cardinality; i++) {
                    probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
                }
            } else {
                cardinality = enumeratedValues.size();
                ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
                for (int i = 0; i < cardinality; i++) {
                    probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
                }
            }
            distribution = new EnumeratedTreeDistribution<>(probabilities);
            break;
        case LAZY_ZIPF:
            int lazyCardinality;
            Integer startInt = schema.getStartInt();
            lazyCardinality = schema.getEndInt() - startInt;
            distribution = new ZipfDistribution(lazyCardinality, schema.getZipfExponent());
            break;
        case LAZY_DISCRETE_UNIFORM:
            distribution = new UniformIntegerDistribution(schema.getStartInt(), schema.getEndInt());
            break;
        case ENUMERATED:
            for (int i = 0; i < enumeratedValues.size(); i++) {
                probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
            }
            distribution = new EnumeratedTreeDistribution<>(probabilities);
            break;
        default:
            throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
    }
    if (distribution instanceof AbstractIntegerDistribution) {
        ((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
    } else if (distribution instanceof AbstractRealDistribution) {
        ((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
    } else {
        ((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
    }
}
Also used : ValueType(org.apache.druid.segment.column.ValueType) ArrayList(java.util.ArrayList) UniformRealDistribution(org.apache.commons.math3.distribution.UniformRealDistribution) AbstractIntegerDistribution(org.apache.commons.math3.distribution.AbstractIntegerDistribution) AbstractRealDistribution(org.apache.commons.math3.distribution.AbstractRealDistribution) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) ZipfDistribution(org.apache.commons.math3.distribution.ZipfDistribution) UniformIntegerDistribution(org.apache.commons.math3.distribution.UniformIntegerDistribution) Pair(org.apache.commons.math3.util.Pair)

Example 5 with ValueType

use of org.apache.druid.segment.column.ValueType in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
    final Metadata segmentMetadata;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        FileUtils.mkdirp(outDir);
        SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
        log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
        SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
        closer.register(segmentWriteOutMedium);
        long startTime = System.currentTimeMillis();
        Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
        log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
            if (customSegmentLoader != null) {
                mapper.writeValue(fos, customSegmentLoader);
            } else {
                mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
            }
        }
        log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
        final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
        final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMergerV9> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionHandler handler = handlers.get(mergedDimensions.get(i));
            mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
        }
        /**
         *********** Setup Dim Conversions *************
         */
        progress.progress();
        startTime = System.currentTimeMillis();
        writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /**
         *********** Walk through data sets, merge them, and write merged columns ************
         */
        progress.progress();
        final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
        closer.register(timeAndDimsIterator);
        final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
        final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
        /**
         ********** Create Inverted Indexes and Finalize Build Columns ************
         */
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = mergers.get(i);
            merger.writeIndexes(rowNumConversions);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /**
         *********** Make index.drd & metadata.drd files *************
         */
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) Closer(org.apache.druid.java.util.common.io.Closer) ValueType(org.apache.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TreeMap(java.util.TreeMap) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File)

Aggregations

ValueType (org.apache.druid.segment.column.ValueType)8 ISE (org.apache.druid.java.util.common.ISE)3 ArrayList (java.util.ArrayList)2 AbstractIntegerDistribution (org.apache.commons.math3.distribution.AbstractIntegerDistribution)2 AbstractRealDistribution (org.apache.commons.math3.distribution.AbstractRealDistribution)2 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)2 ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)2 ColumnDescriptor (org.apache.druid.segment.column.ColumnDescriptor)2 ComplexMetricSerde (org.apache.druid.segment.serde.ComplexMetricSerde)2 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IntBuffer (java.nio.IntBuffer)1 TreeMap (java.util.TreeMap)1 Nullable (javax.annotation.Nullable)1 RexNode (org.apache.calcite.rex.RexNode)1 EnumeratedDistribution (org.apache.commons.math3.distribution.EnumeratedDistribution)1 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)1 UniformIntegerDistribution (org.apache.commons.math3.distribution.UniformIntegerDistribution)1 UniformRealDistribution (org.apache.commons.math3.distribution.UniformRealDistribution)1 ZipfDistribution (org.apache.commons.math3.distribution.ZipfDistribution)1