Search in sources :

Example 36 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by druid-io.

the class DimensionHandlerUtilsTest method testGetHandlerFromDoubleCapabilities.

@Test
public void testGetHandlerFromDoubleCapabilities() {
    ColumnCapabilities capabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.DOUBLE);
    DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(DIM_NAME, capabilities, null);
    Assert.assertTrue(handler instanceof DoubleDimensionHandler);
    Assert.assertTrue(handler.getDimensionSchema(capabilities) instanceof DoubleDimensionSchema);
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 37 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by druid-io.

the class IndexMergerV9 method makeDimensionHandlers.

private Map<String, DimensionHandler> makeDimensionHandlers(final List<String> mergedDimensions, final List<ColumnCapabilities> dimCapabilities) {
    Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
    for (int i = 0; i < mergedDimensions.size(); i++) {
        ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot(dimCapabilities.get(i), DIMENSION_CAPABILITY_MERGE_LOGIC);
        String dimName = mergedDimensions.get(i);
        DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null);
        handlers.put(dimName, handler);
    }
    return handlers;
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) LinkedHashMap(java.util.LinkedHashMap)

Example 38 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
    final Metadata segmentMetadata;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        FileUtils.mkdirp(outDir);
        SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
        log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
        SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
        closer.register(segmentWriteOutMedium);
        long startTime = System.currentTimeMillis();
        Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
        log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
            if (customSegmentLoader != null) {
                mapper.writeValue(fos, customSegmentLoader);
            } else {
                mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
            }
        }
        log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
        final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
        final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMergerV9> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionHandler handler = handlers.get(mergedDimensions.get(i));
            mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
        }
        /**
         *********** Setup Dim Conversions *************
         */
        progress.progress();
        startTime = System.currentTimeMillis();
        writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /**
         *********** Walk through data sets, merge them, and write merged columns ************
         */
        progress.progress();
        final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
        closer.register(timeAndDimsIterator);
        final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
        final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
        /**
         ********** Create Inverted Indexes and Finalize Build Columns ************
         */
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = mergers.get(i);
            merger.writeIndexes(rowNumConversions);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /**
         *********** Make index.drd & metadata.drd files *************
         */
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) Closer(org.apache.druid.java.util.common.io.Closer) ValueType(org.apache.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TreeMap(java.util.TreeMap) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File)

Example 39 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by druid-io.

the class RowBasedGrouperHelper method createResultRowBasedColumnSelectorFactory.

/**
 * Creates a {@link ColumnSelectorFactory} that can read rows which originate as results of the provided "query".
 *
 * @param query        a groupBy query
 * @param supplier     supplier of result rows from the query
 * @param finalization whether the column capabilities reported by this factory should reflect finalized types
 */
public static ColumnSelectorFactory createResultRowBasedColumnSelectorFactory(final GroupByQuery query, final Supplier<ResultRow> supplier, final RowSignature.Finalization finalization) {
    final RowSignature signature = query.getResultRowSignature(finalization);
    final RowAdapter<ResultRow> adapter = new RowAdapter<ResultRow>() {

        @Override
        public ToLongFunction<ResultRow> timestampFunction() {
            if (query.getResultRowHasTimestamp()) {
                return row -> row.getLong(0);
            } else {
                final long timestamp = query.getUniversalTimestamp().getMillis();
                return row -> timestamp;
            }
        }

        @Override
        public Function<ResultRow, Object> columnFunction(final String columnName) {
            final int columnIndex = signature.indexOf(columnName);
            if (columnIndex < 0) {
                return row -> null;
            } else {
                return row -> row.get(columnIndex);
            }
        }
    };
    // Decorate "signature" so that it returns hasMultipleValues = false. (groupBy does not return multiple values.)
    final ColumnInspector decoratedSignature = new ColumnInspector() {

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String column) {
            final ColumnCapabilities baseCapabilities = signature.getColumnCapabilities(column);
            if (baseCapabilities == null || baseCapabilities.hasMultipleValues().isFalse()) {
                return baseCapabilities;
            } else {
                return ColumnCapabilitiesImpl.copyOf(baseCapabilities).setHasMultipleValues(false);
            }
        }
    };
    return RowBasedColumnSelectorFactory.create(adapter, supplier::get, decoratedSignature, false);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) IntArrayUtils(org.apache.druid.common.utils.IntArrayUtils) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnSelectorStrategyFactory(org.apache.druid.query.dimension.ColumnSelectorStrategyFactory) JsonValue(com.fasterxml.jackson.annotation.JsonValue) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BufferComparator(org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) ToLongFunction(java.util.function.ToLongFunction) Longs(com.google.common.primitives.Longs) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ResultRow(org.apache.druid.query.groupby.ResultRow) Predicate(java.util.function.Predicate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) List(java.util.List) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) IntStream(java.util.stream.IntStream) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ComparableList(org.apache.druid.segment.data.ComparableList) Supplier(com.google.common.base.Supplier) BaseQuery(org.apache.druid.query.BaseQuery) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ColumnSelectorStrategy(org.apache.druid.query.dimension.ColumnSelectorStrategy) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) StringComparator(org.apache.druid.query.ordering.StringComparator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) BitSet(java.util.BitSet) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) Filters(org.apache.druid.segment.filter.Filters) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) Filter(org.apache.druid.query.filter.Filter) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnInspector(org.apache.druid.segment.ColumnInspector) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 40 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by druid-io.

the class ApproximateHistogramVectorAggregatorTest method setup.

@Before
public void setup() {
    NullHandling.initializeForTests();
    VectorValueSelector vectorValueSelector_1 = createMock(VectorValueSelector.class);
    expect(vectorValueSelector_1.getFloatVector()).andReturn(FLOATS).anyTimes();
    expect(vectorValueSelector_1.getNullVector()).andReturn(NULL_VECTOR).anyTimes();
    VectorValueSelector vectorValueSelector_2 = createMock(VectorValueSelector.class);
    expect(vectorValueSelector_2.getFloatVector()).andReturn(FLOATS).anyTimes();
    expect(vectorValueSelector_2.getNullVector()).andReturn(null).anyTimes();
    EasyMock.replay(vectorValueSelector_1);
    EasyMock.replay(vectorValueSelector_2);
    ColumnCapabilities columnCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.DOUBLE);
    vectorColumnSelectorFactory = createMock(VectorColumnSelectorFactory.class);
    expect(vectorColumnSelectorFactory.getColumnCapabilities("field_1")).andReturn(columnCapabilities).anyTimes();
    expect(vectorColumnSelectorFactory.makeValueSelector("field_1")).andReturn(vectorValueSelector_1).anyTimes();
    expect(vectorColumnSelectorFactory.getColumnCapabilities("field_2")).andReturn(columnCapabilities).anyTimes();
    expect(vectorColumnSelectorFactory.makeValueSelector("field_2")).andReturn(vectorValueSelector_2).anyTimes();
    expect(vectorColumnSelectorFactory.getColumnCapabilities("string_field")).andReturn(new ColumnCapabilitiesImpl().setType(ColumnType.STRING));
    expect(vectorColumnSelectorFactory.getColumnCapabilities("complex_field")).andReturn(new ColumnCapabilitiesImpl().setType(ApproximateHistogramAggregatorFactory.TYPE));
    EasyMock.replay(vectorColumnSelectorFactory);
}
Also used : VectorValueSelector(org.apache.druid.segment.vector.VectorValueSelector) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) VectorColumnSelectorFactory(org.apache.druid.segment.vector.VectorColumnSelectorFactory) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) Before(org.junit.Before)

Aggregations

ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)78 Test (org.junit.Test)52 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)29 ColumnCapabilitiesImpl (org.apache.druid.segment.column.ColumnCapabilitiesImpl)9 ArrayList (java.util.ArrayList)4 Nullable (javax.annotation.Nullable)4 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)4 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)4 ColumnValueSelector (org.apache.druid.segment.ColumnValueSelector)4 QueryableIndex (org.apache.druid.segment.QueryableIndex)4 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)4 ColumnType (org.apache.druid.segment.column.ColumnType)4 RowSignature (org.apache.druid.segment.column.RowSignature)4 ValueType (org.apache.druid.segment.column.ValueType)4 List (java.util.List)3 NullHandlingTest (org.apache.druid.common.config.NullHandlingTest)3 ISE (org.apache.druid.java.util.common.ISE)3 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)3 VectorValueSelector (org.apache.druid.segment.vector.VectorValueSelector)3 Before (org.junit.Before)3