Search in sources :

Example 41 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class IncrementalIndexReadBenchmark method readWithFilters.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithFilters(Blackhole blackhole) {
    DimFilter filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getEnabledInstance()), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
    IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex);
    Sequence<Cursor> cursors = makeCursors(sa, filter);
    Cursor cursor = cursors.limit(1).toList().get(0);
    List<DimensionSelector> selectors = new ArrayList<>();
    selectors.add(makeDimensionSelector(cursor, "dimSequential"));
    selectors.add(makeDimensionSelector(cursor, "dimZipf"));
    selectors.add(makeDimensionSelector(cursor, "dimUniform"));
    selectors.add(makeDimensionSelector(cursor, "dimSequentialHalfNull"));
    cursor.reset();
    while (!cursor.isDone()) {
        for (DimensionSelector selector : selectors) {
            IndexedInts row = selector.getRow();
            blackhole.consume(selector.lookupName(row.get(0)));
        }
        cursor.advance();
    }
}
Also used : RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) DimensionSelector(org.apache.druid.segment.DimensionSelector) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) ArrayList(java.util.ArrayList) Cursor(org.apache.druid.segment.Cursor) IndexedInts(org.apache.druid.segment.data.IndexedInts) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 42 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class IndexedTableJoinableTest method makeJoinMatcherWithDimensionSelectorOnString.

@Test
public void makeJoinMatcherWithDimensionSelectorOnString() {
    final JoinConditionAnalysis condition = JoinConditionAnalysis.forExpression("x == \"j.str\"", PREFIX, ExprMacroTable.nil());
    final JoinMatcher joinMatcher = target.makeJoinMatcher(dummyColumnSelectorFactory, condition, false, false, Closer.create());
    final DimensionSelector selector = joinMatcher.getColumnSelectorFactory().makeDimensionSelector(DefaultDimensionSpec.of("str"));
    // getValueCardinality
    Assert.assertEquals(4, selector.getValueCardinality());
    // nameLookupPossibleInAdvance
    Assert.assertTrue(selector.nameLookupPossibleInAdvance());
    // lookupName
    Assert.assertEquals("foo", selector.lookupName(0));
    Assert.assertEquals("bar", selector.lookupName(1));
    Assert.assertEquals("baz", selector.lookupName(2));
    Assert.assertNull(selector.lookupName(3));
    // lookupId
    Assert.assertNull(selector.idLookup());
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) ConstantDimensionSelector(org.apache.druid.segment.ConstantDimensionSelector) JoinMatcher(org.apache.druid.segment.join.JoinMatcher) JoinConditionAnalysis(org.apache.druid.segment.join.JoinConditionAnalysis) Test(org.junit.Test)

Example 43 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class IndexTaskTest method testNumShardsAndPartitionDimensionsProvided.

@Test
public void testNumShardsAndPartitionDimensionsProvided() throws Exception {
    final File tmpDir = temporaryFolder.newFolder();
    final File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("2014-01-01T00:00:10Z,a,1\n");
        writer.write("2014-01-01T01:00:20Z,b,1\n");
        writer.write("2014-01-01T02:00:30Z,c,1\n");
    }
    final IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, null, null, createTuningConfigWithPartitionsSpec(new HashedPartitionsSpec(null, 2, ImmutableList.of("dim")), true), false, false), null);
    final List<DataSegment> segments = runTask(indexTask).rhs;
    Assert.assertEquals(2, segments.size());
    for (DataSegment segment : segments) {
        Assert.assertEquals(DATASOURCE, segment.getDataSource());
        Assert.assertEquals(Intervals.of("2014/P1D"), segment.getInterval());
        Assert.assertEquals(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
        final HashBasedNumberedShardSpec hashBasedNumberedShardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
        Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, hashBasedNumberedShardSpec.getPartitionFunction());
        final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
        final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
        final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
        final List<Integer> hashes = cursorSequence.map(cursor -> {
            final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
            final int hash = HashPartitionFunction.MURMUR3_32_ABS.hash(HashBasedNumberedShardSpec.serializeGroupKey(jsonMapper, Collections.singletonList(selector.getObject())), hashBasedNumberedShardSpec.getNumBuckets());
            cursor.advance();
            return hash;
        }).toList();
        Assert.assertTrue(hashes.stream().allMatch(h -> h.intValue() == hashes.get(0)));
    }
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) Arrays(java.util.Arrays) IndexSpec(org.apache.druid.segment.IndexSpec) Pair(org.apache.druid.java.util.common.Pair) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) IAE(org.apache.druid.java.util.common.IAE) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) NoopSegmentHandoffNotifierFactory(org.apache.druid.segment.realtime.plumber.NoopSegmentHandoffNotifierFactory) EqualsVerifier(nl.jqno.equalsverifier.EqualsVerifier) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) CountDownLatch(java.util.concurrent.CountDownLatch) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) BufferedWriter(java.io.BufferedWriter) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) CoreMatchers(org.hamcrest.CoreMatchers) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Event(org.apache.druid.java.util.emitter.core.Event) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Parameterized(org.junit.runners.Parameterized) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LockGranularity(org.apache.druid.indexing.common.LockGranularity) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Intervals(org.apache.druid.java.util.common.Intervals) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) Files(com.google.common.io.Files) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) DimensionSelector(org.apache.druid.segment.DimensionSelector) ExpectedException(org.junit.rules.ExpectedException) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Cursor(org.apache.druid.segment.Cursor) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) DimensionSelector(org.apache.druid.segment.DimensionSelector) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) DataSegment(org.apache.druid.timeline.DataSegment) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BufferedWriter(java.io.BufferedWriter) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Test(org.junit.Test)

Example 44 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class StringGroupByColumnSelectorStrategy method getOnlyValue.

@Override
public Object getOnlyValue(ColumnValueSelector selector) {
    final DimensionSelector dimSelector = (DimensionSelector) selector;
    final IndexedInts row = dimSelector.getRow();
    Preconditions.checkState(row.size() < 2, "Not supported for multi-value dimensions");
    return row.size() == 1 ? row.get(0) : GROUP_BY_MISSING_VALUE;
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) IndexedInts(org.apache.druid.segment.data.IndexedInts)

Example 45 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class DictionaryBuildingStringGroupByColumnSelectorStrategy method initColumnValues.

@Override
public void initColumnValues(ColumnValueSelector selector, int columnIndex, Object[] valuess) {
    final DimensionSelector dimSelector = (DimensionSelector) selector;
    final IndexedInts row = dimSelector.getRow();
    ArrayBasedIndexedInts newRow = (ArrayBasedIndexedInts) valuess[columnIndex];
    if (newRow == null) {
        newRow = new ArrayBasedIndexedInts();
        valuess[columnIndex] = newRow;
    }
    int rowSize = row.size();
    newRow.ensureSize(rowSize);
    for (int i = 0; i < rowSize; i++) {
        final String value = dimSelector.lookupName(row.get(i));
        final int dictId = reverseDictionary.getInt(value);
        if (dictId < 0) {
            dictionary.add(value);
            reverseDictionary.put(value, nextId);
            newRow.setValue(i, nextId);
            nextId++;
        } else {
            newRow.setValue(i, dictId);
        }
    }
    newRow.setSize(rowSize);
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) ArrayBasedIndexedInts(org.apache.druid.segment.data.ArrayBasedIndexedInts) IndexedInts(org.apache.druid.segment.data.IndexedInts) ArrayBasedIndexedInts(org.apache.druid.segment.data.ArrayBasedIndexedInts)

Aggregations

DimensionSelector (org.apache.druid.segment.DimensionSelector)66 Test (org.junit.Test)36 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)31 Cursor (org.apache.druid.segment.Cursor)24 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)24 IndexedInts (org.apache.druid.segment.data.IndexedInts)22 List (java.util.List)14 VirtualColumns (org.apache.druid.segment.VirtualColumns)14 ImmutableList (com.google.common.collect.ImmutableList)12 StorageAdapter (org.apache.druid.segment.StorageAdapter)12 ArrayList (java.util.ArrayList)11 Intervals (org.apache.druid.java.util.common.Intervals)11 Granularities (org.apache.druid.java.util.common.granularity.Granularities)11 Sequence (org.apache.druid.java.util.common.guava.Sequence)11 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)11 NullHandling (org.apache.druid.common.config.NullHandling)10 ColumnSelectorFactory (org.apache.druid.segment.ColumnSelectorFactory)9 DataSegment (org.apache.druid.timeline.DataSegment)9 ConstantDimensionSelector (org.apache.druid.segment.ConstantDimensionSelector)8 ValueMatcher (org.apache.druid.query.filter.ValueMatcher)7