Search in sources :

Example 41 with IndexedInts

use of org.apache.druid.segment.data.IndexedInts in project druid by druid-io.

the class IncrementalIndexStorageAdapterTest method testCursoringAndSnapshot.

@Test
public void testCursoringAndSnapshot() throws Exception {
    final IncrementalIndex index = indexCreator.createIndex();
    final long timestamp = System.currentTimeMillis();
    for (int i = 0; i < 2; i++) {
        index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy"), ImmutableMap.of("billy", "v0" + i)));
    }
    final StorageAdapter sa = new IncrementalIndexStorageAdapter(index);
    Sequence<Cursor> cursors = sa.makeCursors(null, Intervals.utc(timestamp - 60_000, timestamp + 60_000), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    final AtomicInteger assertCursorsNotEmpty = new AtomicInteger(0);
    cursors.map(cursor -> {
        DimensionSelector dimSelector1A = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("billy", "billy"));
        int cardinalityA = dimSelector1A.getValueCardinality();
        // index gets more rows at this point, while other thread is iterating over the cursor
        try {
            index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy"), ImmutableMap.of("billy", "v1")));
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
        DimensionSelector dimSelector1B = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("billy", "billy"));
        // index gets more rows at this point, while other thread is iterating over the cursor
        try {
            index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy"), ImmutableMap.of("billy", "v2")));
            index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy2"), ImmutableMap.of("billy2", "v3")));
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
        DimensionSelector dimSelector1C = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("billy", "billy"));
        DimensionSelector dimSelector2D = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("billy2", "billy2"));
        // index gets more rows at this point, while other thread is iterating over the cursor
        try {
            index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy"), ImmutableMap.of("billy", "v3")));
            index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy3"), ImmutableMap.of("billy3", "")));
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
        DimensionSelector dimSelector3E = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("billy3", "billy3"));
        int rowNumInCursor = 0;
        // and then, cursoring continues in the other thread
        while (!cursor.isDone()) {
            IndexedInts rowA = dimSelector1A.getRow();
            rowA.forEach(i -> Assert.assertTrue(i < cardinalityA));
            IndexedInts rowB = dimSelector1B.getRow();
            rowB.forEach(i -> Assert.assertTrue(i < cardinalityA));
            IndexedInts rowC = dimSelector1C.getRow();
            rowC.forEach(i -> Assert.assertTrue(i < cardinalityA));
            IndexedInts rowD = dimSelector2D.getRow();
            // no null id, so should get empty dims array
            Assert.assertEquals(0, rowD.size());
            IndexedInts rowE = dimSelector3E.getRow();
            if (NullHandling.replaceWithDefault()) {
                Assert.assertEquals(1, rowE.size());
                // the null id
                Assert.assertEquals(0, rowE.get(0));
            } else {
                Assert.assertEquals(0, rowE.size());
            }
            cursor.advance();
            rowNumInCursor++;
        }
        Assert.assertEquals(2, rowNumInCursor);
        assertCursorsNotEmpty.incrementAndGet();
        return null;
    }).toList();
    Assert.assertEquals(1, assertCursorsNotEmpty.get());
}
Also used : GroupByQueryEngine(org.apache.druid.query.groupby.GroupByQueryEngine) Arrays(java.util.Arrays) MapBasedRow(org.apache.druid.data.input.MapBasedRow) IndexedInts(org.apache.druid.segment.data.IndexedInts) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) Row(org.apache.druid.data.input.Row) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Parameterized(org.junit.runners.Parameterized) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) SelectorFilter(org.apache.druid.segment.filter.SelectorFilter) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) JavaScriptAggregatorFactory(org.apache.druid.query.aggregation.JavaScriptAggregatorFactory) Set(java.util.Set) List(java.util.List) CloseableStupidPool(org.apache.druid.collections.CloseableStupidPool) Predicate(com.google.common.base.Predicate) TopNQueryEngine(org.apache.druid.query.topn.TopNQueryEngine) BitmapIndexSelector(org.apache.druid.query.filter.BitmapIndexSelector) Iterables(com.google.common.collect.Iterables) Intervals(org.apache.druid.java.util.common.Intervals) DruidDoublePredicate(org.apache.druid.query.filter.DruidDoublePredicate) DimFilters(org.apache.druid.query.filter.DimFilters) RunWith(org.junit.runner.RunWith) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) JavaScriptConfig(org.apache.druid.js.JavaScriptConfig) DruidLongPredicate(org.apache.druid.query.filter.DruidLongPredicate) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) Predicates(com.google.common.base.Predicates) Suppliers(com.google.common.base.Suppliers) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DruidFloatPredicate(org.apache.druid.query.filter.DruidFloatPredicate) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) BitmapResultFactory(org.apache.druid.query.BitmapResultFactory) VirtualColumns(org.apache.druid.segment.VirtualColumns) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IOException(java.io.IOException) ColumnSelector(org.apache.druid.segment.ColumnSelector) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Result(org.apache.druid.query.Result) Rule(org.junit.Rule) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) DruidPredicateFactory(org.apache.druid.query.filter.DruidPredicateFactory) CloserRule(org.apache.druid.segment.CloserRule) Assert(org.junit.Assert) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) Filter(org.apache.druid.query.filter.Filter) DimensionSelector(org.apache.druid.segment.DimensionSelector) StorageAdapter(org.apache.druid.segment.StorageAdapter) Cursor(org.apache.druid.segment.Cursor) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IndexedInts(org.apache.druid.segment.data.IndexedInts) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 42 with IndexedInts

use of org.apache.druid.segment.data.IndexedInts in project druid by druid-io.

the class ForwardingFilteredDimensionSelector method makeValueMatcher.

@Override
public ValueMatcher makeValueMatcher(Predicate<String> predicate) {
    final BitSet valueIds = DimensionSelectorUtils.makePredicateMatchingSet(this, predicate);
    final boolean matchNull = predicate.apply(null);
    return new ValueMatcher() {

        @Override
        public boolean matches() {
            final IndexedInts baseRow = selector.getRow();
            final int baseRowSize = baseRow.size();
            boolean nullRow = true;
            for (int i = 0; i < baseRowSize; ++i) {
                int forwardedValue = idMapping.getForwardedId(baseRow.get(i));
                if (forwardedValue >= 0) {
                    if (valueIds.get(forwardedValue)) {
                        return true;
                    }
                    nullRow = false;
                }
            }
            // null should match empty rows in multi-value columns
            return nullRow && matchNull;
        }

        @Override
        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
            inspector.visit("selector", selector);
        }
    };
}
Also used : ValueMatcher(org.apache.druid.query.filter.ValueMatcher) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) IndexedInts(org.apache.druid.segment.data.IndexedInts) ArrayBasedIndexedInts(org.apache.druid.segment.data.ArrayBasedIndexedInts) BitSet(java.util.BitSet) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)

Example 43 with IndexedInts

use of org.apache.druid.segment.data.IndexedInts in project druid by druid-io.

the class PredicateFilteredDimensionSelector method makeValueMatcher.

@Override
public ValueMatcher makeValueMatcher(final String value) {
    return new ValueMatcher() {

        @Override
        public boolean matches() {
            final IndexedInts baseRow = selector.getRow();
            final int baseRowSize = baseRow.size();
            boolean nullRow = true;
            for (int i = 0; i < baseRowSize; i++) {
                String rowValue = lookupName(baseRow.get(i));
                if (predicate.apply(rowValue)) {
                    if (Objects.equals(rowValue, value)) {
                        return true;
                    }
                    nullRow = false;
                }
            }
            // null should match empty rows in multi-value columns
            return nullRow && value == null;
        }

        @Override
        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
            // PredicateFilteredDimensionSelector.this inspects selector and predicate as well.
            inspector.visit("selector", PredicateFilteredDimensionSelector.this);
        }
    };
}
Also used : ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ArrayBasedIndexedInts(org.apache.druid.segment.data.ArrayBasedIndexedInts) IndexedInts(org.apache.druid.segment.data.IndexedInts) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)

Example 44 with IndexedInts

use of org.apache.druid.segment.data.IndexedInts in project druid by druid-io.

the class PooledTopNAlgorithm method scanAndAggregateDefault.

/**
 * Use aggressive loop unrolling to aggregate the data
 *
 * How this works: The aggregates are evaluated AGG_UNROLL_COUNT at a time. This was chosen to be 8 rather arbitrarily.
 * The offsets into the output buffer are precalculated and stored in aggregatorOffsets
 *
 * For queries whose aggregate count is less than AGG_UNROLL_COUNT, the aggregates evaluted in a switch statement.
 * See http://en.wikipedia.org/wiki/Duff's_device for more information on this kind of approach
 *
 * This allows out of order execution of the code. In local tests, the JVM inlines all the way to this function.
 *
 * If there are more than AGG_UNROLL_COUNT aggregates, then the remainder is calculated with the switch, and the
 * blocks of AGG_UNROLL_COUNT are calculated in a partially unrolled for-loop.
 *
 * Putting the switch first allows for optimization for the common case (less than AGG_UNROLL_COUNT aggs) but
 * still optimizes the high quantity of aggregate queries which benefit greatly from any speed improvements
 * (they simply take longer to start with).
 */
private static long scanAndAggregateDefault(final PooledTopNParams params, final int[] positions, final BufferAggregator[] theAggregators) {
    if (params.getCardinality() < 0) {
        throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality");
    }
    final ByteBuffer resultsBuf = params.getResultsBuf();
    final int numBytesPerRecord = params.getNumBytesPerRecord();
    final int[] aggregatorSizes = params.getAggregatorSizes();
    final Cursor cursor = params.getCursor();
    final DimensionSelector dimSelector = params.getDimSelector();
    final int[] aggregatorOffsets = new int[aggregatorSizes.length];
    for (int j = 0, offset = 0; j < aggregatorSizes.length; ++j) {
        aggregatorOffsets[j] = offset;
        offset += aggregatorSizes[j];
    }
    final int aggSize = theAggregators.length;
    final int aggExtra = aggSize % AGG_UNROLL_COUNT;
    int currentPosition = 0;
    long processedRows = 0;
    while (!cursor.isDoneOrInterrupted()) {
        final IndexedInts dimValues = dimSelector.getRow();
        final int dimSize = dimValues.size();
        final int dimExtra = dimSize % AGG_UNROLL_COUNT;
        switch(dimExtra) {
            case 7:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(6), currentPosition);
            // fall through
            case 6:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(5), currentPosition);
            // fall through
            case 5:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(4), currentPosition);
            // fall through
            case 4:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(3), currentPosition);
            // fall through
            case 3:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(2), currentPosition);
            // fall through
            case 2:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(1), currentPosition);
            // fall through
            case 1:
                currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(0), currentPosition);
        }
        for (int i = dimExtra; i < dimSize; i += AGG_UNROLL_COUNT) {
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 1), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 2), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 3), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 4), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 5), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 6), currentPosition);
            currentPosition = aggregateDimValue(positions, theAggregators, resultsBuf, numBytesPerRecord, aggregatorOffsets, aggSize, aggExtra, dimValues.get(i + 7), currentPosition);
        }
        cursor.advanceUninterruptibly();
        processedRows++;
    }
    return processedRows;
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) HistoricalDimensionSelector(org.apache.druid.segment.historical.HistoricalDimensionSelector) SingleValueHistoricalDimensionSelector(org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector) IndexedInts(org.apache.druid.segment.data.IndexedInts) HistoricalCursor(org.apache.druid.segment.historical.HistoricalCursor) Cursor(org.apache.druid.segment.Cursor) ByteBuffer(java.nio.ByteBuffer)

Example 45 with IndexedInts

use of org.apache.druid.segment.data.IndexedInts in project druid by druid-io.

the class DictionaryEncodedColumnMerger method convertSortedSegmentRowValuesToMergedRowValues.

@Override
public ColumnValueSelector convertSortedSegmentRowValuesToMergedRowValues(int segmentIndex, ColumnValueSelector source) {
    IntBuffer converter = dimConversions.get(segmentIndex);
    if (converter == null) {
        return source;
    }
    DimensionSelector sourceDimensionSelector = (DimensionSelector) source;
    IndexedInts convertedRow = new IndexedInts() {

        @Override
        public int size() {
            return sourceDimensionSelector.getRow().size();
        }

        @Override
        public int get(int index) {
            return converter.get(sourceDimensionSelector.getRow().get(index));
        }

        @Override
        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
            inspector.visit("source", source);
            inspector.visit("converter", converter);
        }
    };
    return new DimensionSelector() {

        @Override
        public IndexedInts getRow() {
            return convertedRow;
        }

        @Override
        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
            inspector.visit("convertedRow", convertedRow);
        }

        @Override
        public ValueMatcher makeValueMatcher(String value) {
            throw new UnsupportedOperationException();
        }

        @Override
        public ValueMatcher makeValueMatcher(Predicate<String> predicate) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getValueCardinality() {
            throw new UnsupportedOperationException();
        }

        @Nullable
        @Override
        public String lookupName(int id) {
            throw new UnsupportedOperationException();
        }

        @Override
        public boolean nameLookupPossibleInAdvance() {
            throw new UnsupportedOperationException();
        }

        @Nullable
        @Override
        public IdLookup idLookup() {
            throw new UnsupportedOperationException();
        }

        @Nullable
        @Override
        public Object getObject() {
            return sourceDimensionSelector.getObject();
        }

        @Override
        public Class classOfObject() {
            return sourceDimensionSelector.classOfObject();
        }
    };
}
Also used : IndexedInts(org.apache.druid.segment.data.IndexedInts) IntBuffer(java.nio.IntBuffer) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) Predicate(com.google.common.base.Predicate)

Aggregations

IndexedInts (org.apache.druid.segment.data.IndexedInts)63 DimensionSelector (org.apache.druid.segment.DimensionSelector)22 ValueMatcher (org.apache.druid.query.filter.ValueMatcher)14 Test (org.junit.Test)13 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)12 ArrayBasedIndexedInts (org.apache.druid.segment.data.ArrayBasedIndexedInts)12 Cursor (org.apache.druid.segment.Cursor)10 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)8 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)7 Predicate (com.google.common.base.Predicate)6 ByteBuffer (java.nio.ByteBuffer)6 Nullable (javax.annotation.Nullable)6 ColumnSelectorFactory (org.apache.druid.segment.ColumnSelectorFactory)6 BooleanValueMatcher (org.apache.druid.segment.filter.BooleanValueMatcher)6 List (java.util.List)5 Predicates (com.google.common.base.Predicates)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 Arrays (java.util.Arrays)4 NullHandling (org.apache.druid.common.config.NullHandling)4 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)4