Search in sources :

Example 46 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class ColumnProcessors method makeProcessorInternal.

/**
 * Creates "column processors", which are objects that wrap a single input column and provide some
 * functionality on top of it.
 *
 * @param inputCapabilitiesFn   function that returns capabilities of the column being processed. The type provided
 *                              by these capabilities will be used to determine what kind of selector to create. If
 *                              this function returns null, then processorFactory.defaultType() will be
 *                              used to construct a set of assumed capabilities.
 * @param dimensionSelectorFn   function that creates a DimensionSelector for the column being processed. Will be
 *                              called if the column type is string.
 * @param valueSelectorFunction function that creates a ColumnValueSelector for the column being processed. Will be
 *                              called if the column type is long, float, double, or complex.
 * @param processorFactory      object that encapsulates the knowledge about how to create processors
 * @param selectorFactory       column selector factory used for creating the vector processor
 */
private static <T> T makeProcessorInternal(final Function<ColumnSelectorFactory, ColumnCapabilities> inputCapabilitiesFn, final Function<ColumnSelectorFactory, DimensionSelector> dimensionSelectorFn, final Function<ColumnSelectorFactory, ColumnValueSelector<?>> valueSelectorFunction, final ColumnProcessorFactory<T> processorFactory, final ColumnSelectorFactory selectorFactory) {
    final ColumnCapabilities capabilities = inputCapabilitiesFn.apply(selectorFactory);
    final TypeSignature<ValueType> effectiveType = capabilities != null ? capabilities : processorFactory.defaultType();
    switch(effectiveType.getType()) {
        case STRING:
            return processorFactory.makeDimensionProcessor(dimensionSelectorFn.apply(selectorFactory), mayBeMultiValue(capabilities));
        case LONG:
            return processorFactory.makeLongProcessor(valueSelectorFunction.apply(selectorFactory));
        case FLOAT:
            return processorFactory.makeFloatProcessor(valueSelectorFunction.apply(selectorFactory));
        case DOUBLE:
            return processorFactory.makeDoubleProcessor(valueSelectorFunction.apply(selectorFactory));
        case COMPLEX:
            return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory));
        default:
            throw new ISE("Unsupported type[%s]", effectiveType.asTypeString());
    }
}
Also used : ValueType(org.apache.druid.segment.column.ValueType) ISE(org.apache.druid.java.util.common.ISE) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 47 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class IndexMergerV9 method makeIndexFiles.

private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
    final Metadata segmentMetadata;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        FileUtils.mkdirp(outDir);
        SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
        log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
        SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
        closer.register(segmentWriteOutMedium);
        long startTime = System.currentTimeMillis();
        Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
        log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
            if (customSegmentLoader != null) {
                mapper.writeValue(fos, customSegmentLoader);
            } else {
                mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
            }
        }
        log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
        final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
        final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMergerV9> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionHandler handler = handlers.get(mergedDimensions.get(i));
            mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
        }
        /**
         *********** Setup Dim Conversions *************
         */
        progress.progress();
        startTime = System.currentTimeMillis();
        writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /**
         *********** Walk through data sets, merge them, and write merged columns ************
         */
        progress.progress();
        final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
        closer.register(timeAndDimsIterator);
        final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
        final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
        /**
         ********** Create Inverted Indexes and Finalize Build Columns ************
         */
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = mergers.get(i);
            merger.writeIndexes(rowNumConversions);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /**
         *********** Make index.drd & metadata.drd files *************
         */
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) Closer(org.apache.druid.java.util.common.io.Closer) ValueType(org.apache.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TreeMap(java.util.TreeMap) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File)

Example 48 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class IndexMergerV9 method makeDimensionHandlers.

private Map<String, DimensionHandler> makeDimensionHandlers(final List<String> mergedDimensions, final List<ColumnCapabilities> dimCapabilities) {
    Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
    for (int i = 0; i < mergedDimensions.size(); i++) {
        ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot(dimCapabilities.get(i), DIMENSION_CAPABILITY_MERGE_LOGIC);
        String dimName = mergedDimensions.get(i);
        DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null);
        handlers.put(dimName, handler);
    }
    return handlers;
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) LinkedHashMap(java.util.LinkedHashMap)

Example 49 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class QueryableIndexColumnSelectorFactory method makeDimensionSelectorUndecorated.

private DimensionSelector makeDimensionSelectorUndecorated(DimensionSpec dimensionSpec) {
    final String dimension = dimensionSpec.getDimension();
    final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
    final ColumnHolder columnHolder = index.getColumnHolder(dimension);
    if (columnHolder == null) {
        return DimensionSelector.constant(null, extractionFn);
    }
    if (dimension.equals(ColumnHolder.TIME_COLUMN_NAME)) {
        return new SingleScanTimeDimensionSelector(makeColumnValueSelector(dimension), extractionFn, descending);
    }
    ColumnCapabilities capabilities = columnHolder.getCapabilities();
    if (columnHolder.getCapabilities().isNumeric()) {
        return ValueTypes.makeNumericWrappingDimensionSelector(capabilities.getType(), makeColumnValueSelector(dimension), extractionFn);
    }
    final DictionaryEncodedColumn column = getCachedColumn(dimension, DictionaryEncodedColumn.class);
    if (column != null) {
        return column.makeDimensionSelector(offset, extractionFn);
    } else {
        return DimensionSelector.constant(null, extractionFn);
    }
}
Also used : ExtractionFn(org.apache.druid.query.extraction.ExtractionFn) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 50 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class IncrementalIndexAdapter method processRows.

/**
 * Sometimes it's hard to tell whether one dimension contains a null value or not.
 * If one dimension had show a null or empty value explicitly, then yes, it contains
 * null value. But if one dimension's values are all non-null, it still early to say
 * this dimension does not contain null value. Consider a two row case, first row had
 * "dimA=1" and "dimB=2", the second row only had "dimA=3". To dimB, its value are "2" and
 * never showed a null or empty value. But when we combines these two rows, dimB is null
 * in row 2. So we should iterate all rows to determine whether one dimension contains
 * a null value.
 */
private void processRows(IncrementalIndex index, BitmapFactory bitmapFactory, List<IncrementalIndex.DimensionDesc> dimensions) {
    int rowNum = 0;
    for (IncrementalIndexRow row : index.getFacts().persistIterable()) {
        final Object[] dims = row.getDims();
        for (IncrementalIndex.DimensionDesc dimension : dimensions) {
            final int dimIndex = dimension.getIndex();
            DimensionAccessor accessor = accessors.get(dimension.getName());
            // Add 'null' to the dimension's dictionary.
            if (dimIndex >= dims.length || dims[dimIndex] == null) {
                accessor.indexer.processRowValsToUnsortedEncodedKeyComponent(null, true);
                continue;
            }
            final ColumnCapabilities capabilities = dimension.getCapabilities();
            if (capabilities.hasBitmapIndexes()) {
                final MutableBitmap[] bitmapIndexes = accessor.invertedIndexes;
                final DimensionIndexer indexer = accessor.indexer;
                indexer.fillBitmapsFromUnsortedEncodedKeyComponent(dims[dimIndex], rowNum, bitmapIndexes, bitmapFactory);
            }
        }
        ++rowNum;
    }
}
Also used : DimensionIndexer(org.apache.druid.segment.DimensionIndexer) MutableBitmap(org.apache.druid.collections.bitmap.MutableBitmap) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Aggregations

ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)156 Test (org.junit.Test)104 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)58 ColumnCapabilitiesImpl (org.apache.druid.segment.column.ColumnCapabilitiesImpl)18 ArrayList (java.util.ArrayList)8 Nullable (javax.annotation.Nullable)8 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)8 ColumnValueSelector (org.apache.druid.segment.ColumnValueSelector)8 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)8 ColumnType (org.apache.druid.segment.column.ColumnType)8 RowSignature (org.apache.druid.segment.column.RowSignature)8 ValueType (org.apache.druid.segment.column.ValueType)7 List (java.util.List)6 NullHandlingTest (org.apache.druid.common.config.NullHandlingTest)6 Pair (org.apache.druid.java.util.common.Pair)6 Expr (org.apache.druid.math.expr.Expr)6 ExpressionType (org.apache.druid.math.expr.ExpressionType)6 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)6 DimensionIndexer (org.apache.druid.segment.DimensionIndexer)6