Search in sources :

Example 1 with DimensionHandler

use of org.apache.druid.segment.DimensionHandler in project druid by druid-io.

the class CompactionTask method createDimensionsSpec.

private static DimensionsSpec createDimensionsSpec(List<NonnullPair<QueryableIndex, DataSegment>> queryableIndices) {
    final BiMap<String, Integer> uniqueDims = HashBiMap.create();
    final Map<String, DimensionSchema> dimensionSchemaMap = new HashMap<>();
    // Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be
    // optimized for performance.
    // Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more
    // frequently, and thus the performance should be optimized for recent ones rather than old ones.
    // sort timelineSegments in order of interval, see https://github.com/apache/druid/pull/9905
    queryableIndices.sort((o1, o2) -> Comparators.intervalsByStartThenEnd().compare(o1.rhs.getInterval(), o2.rhs.getInterval()));
    int index = 0;
    for (NonnullPair<QueryableIndex, DataSegment> pair : Lists.reverse(queryableIndices)) {
        final QueryableIndex queryableIndex = pair.lhs;
        final Map<String, DimensionHandler> dimensionHandlerMap = queryableIndex.getDimensionHandlers();
        for (String dimension : queryableIndex.getAvailableDimensions()) {
            final ColumnHolder columnHolder = Preconditions.checkNotNull(queryableIndex.getColumnHolder(dimension), "Cannot find column for dimension[%s]", dimension);
            if (!uniqueDims.containsKey(dimension)) {
                final DimensionHandler dimensionHandler = Preconditions.checkNotNull(dimensionHandlerMap.get(dimension), "Cannot find dimensionHandler for dimension[%s]", dimension);
                uniqueDims.put(dimension, index++);
                dimensionSchemaMap.put(dimension, createDimensionSchema(dimension, columnHolder.getCapabilities(), dimensionHandler.getMultivalueHandling()));
            }
        }
    }
    final BiMap<Integer, String> orderedDims = uniqueDims.inverse();
    final List<DimensionSchema> dimensionSchemas = IntStream.range(0, orderedDims.size()).mapToObj(i -> {
        final String dimName = orderedDims.get(i);
        return Preconditions.checkNotNull(dimensionSchemaMap.get(dimName), "Cannot find dimension[%s] from dimensionSchemaMap", dimName);
    }).collect(Collectors.toList());
    return new DimensionsSpec(dimensionSchemas);
}
Also used : Verify(org.apache.curator.shaded.com.google.common.base.Verify) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Comparators(org.apache.druid.java.util.common.guava.Comparators) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) IAE(org.apache.druid.java.util.common.IAE) MultiValueHandling(org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling) BiMap(com.google.common.collect.BiMap) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) Property(org.apache.druid.indexer.Property) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) Segments(org.apache.druid.indexing.overlord.Segments) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) LockGranularity(org.apache.druid.indexing.common.LockGranularity) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) Duration(org.joda.time.Duration) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) CompactSegments(org.apache.druid.server.coordinator.duty.CompactSegments) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) Nonnull(javax.annotation.Nonnull) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) RE(org.apache.druid.java.util.common.RE) NonnullPair(org.apache.druid.java.util.common.NonnullPair) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IOException(java.io.IOException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) File(java.io.File) HashBiMap(com.google.common.collect.HashBiMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) DimensionHandler(org.apache.druid.segment.DimensionHandler) TreeMap(java.util.TreeMap) Checks(org.apache.druid.indexer.Checks) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) DimensionHandler(org.apache.druid.segment.DimensionHandler) HashMap(java.util.HashMap) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DataSegment(org.apache.druid.timeline.DataSegment) QueryableIndex(org.apache.druid.segment.QueryableIndex) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec)

Example 2 with DimensionHandler

use of org.apache.druid.segment.DimensionHandler in project druid by druid-io.

the class IncrementalIndex method loadDimensionIterable.

/**
 * Currently called to initialize IncrementalIndex dimension order during index creation
 * Index dimension ordering could be changed to initialize from DimensionsSpec after resolution of
 * https://github.com/apache/druid/issues/2011
 */
public void loadDimensionIterable(Iterable<String> oldDimensionOrder, Map<String, ColumnCapabilities> oldColumnCapabilities) {
    synchronized (dimensionDescs) {
        if (!dimensionDescs.isEmpty()) {
            throw new ISE("Cannot load dimension order when existing order[%s] is not empty.", dimensionDescs.keySet());
        }
        for (String dim : oldDimensionOrder) {
            if (dimensionDescs.get(dim) == null) {
                ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot(oldColumnCapabilities.get(dim), IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC);
                DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null);
                addNewDimension(dim, handler);
            }
        }
    }
}
Also used : DimensionHandler(org.apache.druid.segment.DimensionHandler) ISE(org.apache.druid.java.util.common.ISE) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl)

Example 3 with DimensionHandler

use of org.apache.druid.segment.DimensionHandler in project druid by druid-io.

the class IncrementalIndexRowIterator method makeRowPointer.

private static RowPointer makeRowPointer(IncrementalIndex incrementalIndex, IncrementalIndexRowHolder rowHolder, RowNumCounter rowNumCounter) {
    ColumnSelectorFactory columnSelectorFactory = new IncrementalIndexColumnSelectorFactory(new IncrementalIndexStorageAdapter(incrementalIndex), VirtualColumns.EMPTY, false, rowHolder);
    ColumnValueSelector[] dimensionSelectors = incrementalIndex.getDimensions().stream().map(dim -> {
        ColumnValueSelector selectorWithUnsortedValues = columnSelectorFactory.makeColumnValueSelector(dim.getName());
        return dim.getIndexer().convertUnsortedValuesToSorted(selectorWithUnsortedValues);
    }).toArray(ColumnValueSelector[]::new);
    List<DimensionHandler> dimensionHandlers = incrementalIndex.getDimensions().stream().map(IncrementalIndex.DimensionDesc::getHandler).collect(Collectors.toList());
    ColumnValueSelector[] metricSelectors = incrementalIndex.getMetricNames().stream().map(columnSelectorFactory::makeColumnValueSelector).toArray(ColumnValueSelector[]::new);
    return new RowPointer(rowHolder, dimensionSelectors, dimensionHandlers, metricSelectors, incrementalIndex.getMetricNames(), rowNumCounter);
}
Also used : VirtualColumns(org.apache.druid.segment.VirtualColumns) Iterator(java.util.Iterator) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) IndexableAdapter(org.apache.druid.segment.IndexableAdapter) TimeAndDimsPointer(org.apache.druid.segment.TimeAndDimsPointer) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) List(java.util.List) DimensionHandler(org.apache.druid.segment.DimensionHandler) RowPointer(org.apache.druid.segment.RowPointer) RowNumCounter(org.apache.druid.segment.RowNumCounter) TransformableRowIterator(org.apache.druid.segment.TransformableRowIterator) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) DimensionHandler(org.apache.druid.segment.DimensionHandler) RowPointer(org.apache.druid.segment.RowPointer) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector)

Example 4 with DimensionHandler

use of org.apache.druid.segment.DimensionHandler in project druid by druid-io.

the class OnheapIncrementalIndex method iterableWithPostAggregations.

@Override
public Iterable<Row> iterableWithPostAggregations(@Nullable final List<PostAggregator> postAggs, final boolean descending) {
    final AggregatorFactory[] metrics = getMetricAggs();
    {
        return () -> {
            final List<DimensionDesc> dimensions = getDimensions();
            return Iterators.transform(getFacts().iterator(descending), incrementalIndexRow -> {
                final int rowOffset = incrementalIndexRow.getRowIndex();
                Object[] theDims = incrementalIndexRow.getDims();
                Map<String, Object> theVals = Maps.newLinkedHashMap();
                for (int i = 0; i < theDims.length; ++i) {
                    Object dim = theDims[i];
                    DimensionDesc dimensionDesc = dimensions.get(i);
                    if (dimensionDesc == null) {
                        continue;
                    }
                    String dimensionName = dimensionDesc.getName();
                    DimensionHandler handler = dimensionDesc.getHandler();
                    if (dim == null || handler.getLengthOfEncodedKeyComponent(dim) == 0) {
                        theVals.put(dimensionName, null);
                        continue;
                    }
                    final DimensionIndexer indexer = dimensionDesc.getIndexer();
                    Object rowVals = indexer.convertUnsortedEncodedKeyComponentToActualList(dim);
                    theVals.put(dimensionName, rowVals);
                }
                Aggregator[] aggs = getAggsForRow(rowOffset);
                for (int i = 0; i < aggs.length; ++i) {
                    theVals.put(metrics[i].getName(), aggs[i].get());
                }
                if (postAggs != null) {
                    for (PostAggregator postAgg : postAggs) {
                        theVals.put(postAgg.getName(), postAgg.compute(theVals));
                    }
                }
                return new MapBasedRow(incrementalIndexRow.getTimestamp(), theVals);
            });
        };
    }
}
Also used : DimensionHandler(org.apache.druid.segment.DimensionHandler) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) MapBasedRow(org.apache.druid.data.input.MapBasedRow) DimensionIndexer(org.apache.druid.segment.DimensionIndexer)

Aggregations

DimensionHandler (org.apache.druid.segment.DimensionHandler)3 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 JacksonInject (com.fasterxml.jackson.annotation.JacksonInject)1 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)1 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)1 JsonInclude (com.fasterxml.jackson.annotation.JsonInclude)1 Include (com.fasterxml.jackson.annotation.JsonInclude.Include)1 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 BiMap (com.google.common.collect.BiMap)1 HashBiMap (com.google.common.collect.HashBiMap)1 ImmutableList (com.google.common.collect.ImmutableList)1 Lists (com.google.common.collect.Lists)1 File (java.io.File)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1