Search in sources :

Example 11 with ColumnHolder

use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.

the class CompactionTask method createDimensionsSpec.

private static DimensionsSpec createDimensionsSpec(List<NonnullPair<QueryableIndex, DataSegment>> queryableIndices) {
    final BiMap<String, Integer> uniqueDims = HashBiMap.create();
    final Map<String, DimensionSchema> dimensionSchemaMap = new HashMap<>();
    // Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be
    // optimized for performance.
    // Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more
    // frequently, and thus the performance should be optimized for recent ones rather than old ones.
    // sort timelineSegments in order of interval, see https://github.com/apache/druid/pull/9905
    queryableIndices.sort((o1, o2) -> Comparators.intervalsByStartThenEnd().compare(o1.rhs.getInterval(), o2.rhs.getInterval()));
    int index = 0;
    for (NonnullPair<QueryableIndex, DataSegment> pair : Lists.reverse(queryableIndices)) {
        final QueryableIndex queryableIndex = pair.lhs;
        final Map<String, DimensionHandler> dimensionHandlerMap = queryableIndex.getDimensionHandlers();
        for (String dimension : queryableIndex.getAvailableDimensions()) {
            final ColumnHolder columnHolder = Preconditions.checkNotNull(queryableIndex.getColumnHolder(dimension), "Cannot find column for dimension[%s]", dimension);
            if (!uniqueDims.containsKey(dimension)) {
                final DimensionHandler dimensionHandler = Preconditions.checkNotNull(dimensionHandlerMap.get(dimension), "Cannot find dimensionHandler for dimension[%s]", dimension);
                uniqueDims.put(dimension, index++);
                dimensionSchemaMap.put(dimension, createDimensionSchema(dimension, columnHolder.getCapabilities(), dimensionHandler.getMultivalueHandling()));
            }
        }
    }
    final BiMap<Integer, String> orderedDims = uniqueDims.inverse();
    final List<DimensionSchema> dimensionSchemas = IntStream.range(0, orderedDims.size()).mapToObj(i -> {
        final String dimName = orderedDims.get(i);
        return Preconditions.checkNotNull(dimensionSchemaMap.get(dimName), "Cannot find dimension[%s] from dimensionSchemaMap", dimName);
    }).collect(Collectors.toList());
    return new DimensionsSpec(dimensionSchemas);
}
Also used : Verify(org.apache.curator.shaded.com.google.common.base.Verify) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Comparators(org.apache.druid.java.util.common.guava.Comparators) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) IAE(org.apache.druid.java.util.common.IAE) MultiValueHandling(org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling) BiMap(com.google.common.collect.BiMap) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) Property(org.apache.druid.indexer.Property) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) Segments(org.apache.druid.indexing.overlord.Segments) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) LockGranularity(org.apache.druid.indexing.common.LockGranularity) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) Duration(org.joda.time.Duration) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) CompactSegments(org.apache.druid.server.coordinator.duty.CompactSegments) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) Nonnull(javax.annotation.Nonnull) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) RE(org.apache.druid.java.util.common.RE) NonnullPair(org.apache.druid.java.util.common.NonnullPair) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IOException(java.io.IOException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) File(java.io.File) HashBiMap(com.google.common.collect.HashBiMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) DimensionHandler(org.apache.druid.segment.DimensionHandler) TreeMap(java.util.TreeMap) Checks(org.apache.druid.indexer.Checks) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) DimensionHandler(org.apache.druid.segment.DimensionHandler) HashMap(java.util.HashMap) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DataSegment(org.apache.druid.timeline.DataSegment) QueryableIndex(org.apache.druid.segment.QueryableIndex) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec)

Example 12 with ColumnHolder

use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.

the class ColumnarLongsEncodeDataFromSegmentBenchmark method initializeSegmentValueIntermediaryFile.

/**
 * writes column values to an intermediary text file, 1 per line, encoders read from this file as input to write
 * encoded column files.
 */
private void initializeSegmentValueIntermediaryFile() throws IOException {
    File dir = getTmpDir();
    File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName));
    if (!dataFile.exists()) {
        final IndexIO indexIO = new IndexIO(new DefaultObjectMapper(), () -> 0);
        try (final QueryableIndex index = indexIO.loadIndex(new File(segmentPath))) {
            final Set<String> columnNames = new LinkedHashSet<>();
            columnNames.add(ColumnHolder.TIME_COLUMN_NAME);
            Iterables.addAll(columnNames, index.getColumnNames());
            final ColumnHolder column = index.getColumnHolder(columnName);
            final ColumnCapabilities capabilities = column.getCapabilities();
            try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
                if (!capabilities.is(ValueType.LONG)) {
                    throw new RuntimeException("Invalid column type, expected 'Long'");
                }
                LongsColumn theColumn = (LongsColumn) column.getColumn();
                for (int i = 0; i < theColumn.length(); i++) {
                    long value = theColumn.getLongSingleValueRow(i);
                    writer.write(value + "\n");
                }
            }
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) LongsColumn(org.apache.druid.segment.column.LongsColumn) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) IndexIO(org.apache.druid.segment.IndexIO) QueryableIndex(org.apache.druid.segment.QueryableIndex) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) File(java.io.File) Writer(java.io.Writer)

Example 13 with ColumnHolder

use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.

the class SegmentAnalyzer method analyze.

public Map<String, ColumnAnalysis> analyze(Segment segment) {
    Preconditions.checkNotNull(segment, "segment");
    // index is null for incremental-index-based segments, but storageAdapter is always available
    final QueryableIndex index = segment.asQueryableIndex();
    final StorageAdapter storageAdapter = segment.asStorageAdapter();
    // get length and column names from storageAdapter
    final int length = storageAdapter.getNumRows();
    Map<String, ColumnAnalysis> columns = new TreeMap<>();
    final RowSignature rowSignature = storageAdapter.getRowSignature();
    for (String columnName : rowSignature.getColumnNames()) {
        final ColumnCapabilities capabilities;
        if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
            // See javadocs for getSnapshotColumnCapabilities for a discussion of why we need to do this.
            capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName);
        } else {
            capabilities = storageAdapter.getColumnCapabilities(columnName);
        }
        final ColumnAnalysis analysis;
        switch(capabilities.getType()) {
            case LONG:
                final int bytesPerRow = ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
                analysis = analyzeNumericColumn(capabilities, length, bytesPerRow);
                break;
            case FLOAT:
                analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
                break;
            case DOUBLE:
                analysis = analyzeNumericColumn(capabilities, length, Double.BYTES);
                break;
            case STRING:
                if (index != null) {
                    analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
                } else {
                    analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
                }
                break;
            case COMPLEX:
                final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
                analysis = analyzeComplexColumn(capabilities, columnHolder);
                break;
            default:
                log.warn("Unknown column type[%s].", capabilities.asTypeString());
                analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
        }
        columns.put(columnName, analysis);
    }
    return columns;
}
Also used : ColumnHolder(org.apache.druid.segment.column.ColumnHolder) QueryableIndex(org.apache.druid.segment.QueryableIndex) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) StorageAdapter(org.apache.druid.segment.StorageAdapter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) TreeMap(java.util.TreeMap) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 14 with ColumnHolder

use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.

the class UseIndexesStrategy method makeTimeFilteredBitmap.

static ImmutableBitmap makeTimeFilteredBitmap(final QueryableIndex index, final Segment segment, final Filter filter, final Interval interval) {
    final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions();
    final ImmutableBitmap baseFilter;
    if (filter == null) {
        baseFilter = null;
    } else {
        final BitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(index.getBitmapFactoryForDimensions(), VirtualColumns.EMPTY, index);
        Preconditions.checkArgument(filter.supportsBitmapIndex(selector), "filter[%s] should support bitmap", filter);
        baseFilter = filter.getBitmapIndex(selector);
    }
    final ImmutableBitmap timeFilteredBitmap;
    if (!interval.contains(segment.getDataInterval())) {
        final MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap();
        final ColumnHolder timeColumnHolder = index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME);
        try (final NumericColumn timeValues = (NumericColumn) timeColumnHolder.getColumn()) {
            int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true));
            int endIndex = Math.min(timeValues.length() - 1, getStartIndexOfTime(timeValues, interval.getEndMillis(), false));
            for (int i = startIndex; i <= endIndex; i++) {
                timeBitmap.add(i);
            }
            final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap);
            timeFilteredBitmap = (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter);
        }
    } else {
        timeFilteredBitmap = baseFilter;
    }
    return timeFilteredBitmap;
}
Also used : ColumnSelectorBitmapIndexSelector(org.apache.druid.segment.ColumnSelectorBitmapIndexSelector) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) NumericColumn(org.apache.druid.segment.column.NumericColumn) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) MutableBitmap(org.apache.druid.collections.bitmap.MutableBitmap) ColumnSelectorBitmapIndexSelector(org.apache.druid.segment.ColumnSelectorBitmapIndexSelector) BitmapIndexSelector(org.apache.druid.query.filter.BitmapIndexSelector) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory)

Example 15 with ColumnHolder

use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.

the class ListFilteredVirtualColumn method getBitmapIndex.

@Override
@Nullable
public BitmapIndex getBitmapIndex(String columnName, ColumnSelector selector) {
    final ColumnHolder holder = selector.getColumnHolder(delegate.getDimension());
    if (holder == null) {
        return null;
    }
    final BitmapIndex underlyingIndex = holder.getBitmapIndex();
    if (underlyingIndex == null) {
        return null;
    }
    final IdMapping idMapping;
    if (allowList) {
        idMapping = ListFilteredDimensionSpec.buildAllowListIdMapping(values, underlyingIndex.getCardinality(), null, underlyingIndex::getValue);
    } else {
        idMapping = ListFilteredDimensionSpec.buildDenyListIdMapping(values, underlyingIndex.getCardinality(), underlyingIndex::getValue);
    }
    return new ListFilteredBitmapIndex(underlyingIndex, idMapping);
}
Also used : ColumnHolder(org.apache.druid.segment.column.ColumnHolder) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) IdMapping(org.apache.druid.segment.IdMapping) Nullable(javax.annotation.Nullable)

Aggregations

ColumnHolder (org.apache.druid.segment.column.ColumnHolder)23 BitmapIndex (org.apache.druid.segment.column.BitmapIndex)10 Nullable (javax.annotation.Nullable)7 ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)7 ImmutableBitmap (org.apache.druid.collections.bitmap.ImmutableBitmap)6 DictionaryEncodedColumn (org.apache.druid.segment.column.DictionaryEncodedColumn)6 BitmapFactory (org.apache.druid.collections.bitmap.BitmapFactory)5 File (java.io.File)4 HashMap (java.util.HashMap)4 Map (java.util.Map)4 ISE (org.apache.druid.java.util.common.ISE)4 QueryableIndex (org.apache.druid.segment.QueryableIndex)4 BaseColumn (org.apache.druid.segment.column.BaseColumn)4 Test (org.junit.Test)4 RoaringBitmapFactory (org.apache.druid.collections.bitmap.RoaringBitmapFactory)3 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ImmutableList (com.google.common.collect.ImmutableList)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2