Search in sources :

Example 16 with BitmapIndex

use of org.apache.druid.segment.column.BitmapIndex in project druid by druid-io.

the class IndexMergerNullHandlingTest method testStringColumnNullHandling.

@Test
public void testStringColumnNullHandling() throws Exception {
    List<Map<String, Object>> nonNullFlavors = new ArrayList<>();
    nonNullFlavors.add(ImmutableMap.of("d", "a"));
    nonNullFlavors.add(ImmutableMap.of("d", ImmutableList.of("a", "b")));
    List<Map<String, Object>> nullFlavors = new ArrayList<>();
    Map<String, Object> mMissing = ImmutableMap.of();
    Map<String, Object> mEmptyList = ImmutableMap.of("d", Collections.emptyList());
    Map<String, Object> mNull = new HashMap<>();
    mNull.put("d", null);
    Map<String, Object> mEmptyString = ImmutableMap.of("d", "");
    Map<String, Object> mListOfNull = ImmutableMap.of("d", Collections.singletonList(null));
    Map<String, Object> mListOfEmptyString = ImmutableMap.of("d", Collections.singletonList(""));
    nullFlavors.add(mMissing);
    nullFlavors.add(mEmptyList);
    nullFlavors.add(mNull);
    nullFlavors.add(mListOfNull);
    if (NullHandling.replaceWithDefault()) {
        nullFlavors.add(mEmptyString);
        nullFlavors.add(mListOfEmptyString);
    } else {
        nonNullFlavors.add(mEmptyString);
        nonNullFlavors.add(mListOfEmptyString);
    }
    Set<Map<String, Object>> allValues = new HashSet<>();
    allValues.addAll(nonNullFlavors);
    allValues.addAll(nullFlavors);
    for (Set<Map<String, Object>> subset : Sets.powerSet(allValues)) {
        if (subset.isEmpty()) {
            continue;
        }
        final List<Map<String, Object>> subsetList = new ArrayList<>(subset);
        IncrementalIndex toPersist = IncrementalIndexTest.createIndex(new AggregatorFactory[] {});
        for (Map<String, Object> m : subsetList) {
            toPersist.add(new MapBasedInputRow(0L, ImmutableList.of("d"), m));
        }
        final File tempDir = temporaryFolder.newFolder();
        try (QueryableIndex index = indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null))) {
            final ColumnHolder columnHolder = index.getColumnHolder("d");
            if (nullFlavors.containsAll(subsetList)) {
                // all null -> should be missing
                Assert.assertNull(subsetList.toString(), columnHolder);
            } else {
                Assert.assertNotNull(subsetList.toString(), columnHolder);
                // The column has multiple values if there are any lists with > 1 element in the input set.
                final boolean hasMultipleValues = subsetList.stream().anyMatch(m -> m.get("d") instanceof List && (((List) m.get("d")).size() > 1));
                // Compute all unique values, the same way that IndexMerger is expected to do it.
                final Set<String> uniqueValues = new HashSet<>();
                for (Map<String, Object> m : subsetList) {
                    final List<String> dValues = normalize(m.get("d"), hasMultipleValues);
                    uniqueValues.addAll(dValues);
                    if (nullFlavors.contains(m)) {
                        uniqueValues.add(null);
                    }
                }
                try (final DictionaryEncodedColumn<String> dictionaryColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn()) {
                    // Verify unique values against the dictionary.
                    Assert.assertEquals(subsetList.toString(), uniqueValues.stream().sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList()), IntStream.range(0, dictionaryColumn.getCardinality()).mapToObj(dictionaryColumn::lookupName).collect(Collectors.toList()));
                    Assert.assertEquals(subsetList.toString(), hasMultipleValues, dictionaryColumn.hasMultipleValues());
                    Assert.assertEquals(subsetList.toString(), uniqueValues.size(), dictionaryColumn.getCardinality());
                    // Verify the expected set of rows was indexed, ignoring order.
                    Assert.assertEquals(subsetList.toString(), ImmutableMultiset.copyOf(subsetList.stream().map(m -> normalize(m.get("d"), hasMultipleValues)).distinct().collect(Collectors.toList())), ImmutableMultiset.copyOf(IntStream.range(0, index.getNumRows()).mapToObj(rowNumber -> getRow(dictionaryColumn, rowNumber)).distinct().collect(Collectors.toList())));
                    // Verify that the bitmap index for null is correct.
                    final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
                    // Read through the column to find all the rows that should match null.
                    final List<Integer> expectedNullRows = new ArrayList<>();
                    for (int i = 0; i < index.getNumRows(); i++) {
                        final List<String> row = getRow(dictionaryColumn, i);
                        if (row.isEmpty() || row.stream().anyMatch(NullHandling::isNullOrEquivalent)) {
                            expectedNullRows.add(i);
                        }
                    }
                    Assert.assertEquals(subsetList.toString(), expectedNullRows.size() > 0, bitmapIndex.hasNulls());
                    if (expectedNullRows.size() > 0) {
                        Assert.assertEquals(subsetList.toString(), 0, bitmapIndex.getIndex(null));
                        final ImmutableBitmap nullBitmap = bitmapIndex.getBitmap(bitmapIndex.getIndex(null));
                        final List<Integer> actualNullRows = new ArrayList<>();
                        final IntIterator iterator = nullBitmap.iterator();
                        while (iterator.hasNext()) {
                            actualNullRows.add(iterator.next());
                        }
                        Assert.assertEquals(subsetList.toString(), expectedNullRows, actualNullRows);
                    } else {
                        Assert.assertEquals(-1, bitmapIndex.getIndex(null));
                    }
                }
            }
        }
    }
}
Also used : IntIterator(org.roaringbitmap.IntIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashSet(java.util.HashSet) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) Test(org.junit.Test)

Example 17 with BitmapIndex

use of org.apache.druid.segment.column.BitmapIndex in project druid by druid-io.

the class ColumnSelectorBitmapIndexSelectorTest method setup.

@Before
public void setup() {
    bitmapFactory = EasyMock.createMock(BitmapFactory.class);
    virtualColumns = EasyMock.createMock(VirtualColumns.class);
    index = EasyMock.createMock(ColumnSelector.class);
    bitmapIndexSelector = new ColumnSelectorBitmapIndexSelector(bitmapFactory, virtualColumns, index);
    EasyMock.expect(virtualColumns.getVirtualColumn(STRING_DICTIONARY_COLUMN_NAME)).andReturn(null).anyTimes();
    EasyMock.expect(virtualColumns.getVirtualColumn(NON_STRING_DICTIONARY_COLUMN_NAME)).andReturn(null).anyTimes();
    ColumnHolder holder = EasyMock.createMock(ColumnHolder.class);
    EasyMock.expect(index.getColumnHolder(STRING_DICTIONARY_COLUMN_NAME)).andReturn(holder).anyTimes();
    StringDictionaryEncodedColumn stringColumn = EasyMock.createMock(StringDictionaryEncodedColumn.class);
    EasyMock.expect(holder.getCapabilities()).andReturn(ColumnCapabilitiesImpl.createDefault().setType(ColumnType.STRING).setDictionaryEncoded(true).setDictionaryValuesUnique(true).setDictionaryValuesSorted(true).setHasBitmapIndexes(true)).anyTimes();
    EasyMock.expect(holder.getColumn()).andReturn(stringColumn).anyTimes();
    BitmapIndex someIndex = EasyMock.createMock(BitmapIndex.class);
    EasyMock.expect(holder.getBitmapIndex()).andReturn(someIndex).anyTimes();
    ImmutableBitmap someBitmap = EasyMock.createMock(ImmutableBitmap.class);
    EasyMock.expect(someIndex.getIndex("foo")).andReturn(0).anyTimes();
    EasyMock.expect(someIndex.getBitmap(0)).andReturn(someBitmap).anyTimes();
    ColumnHolder nonStringHolder = EasyMock.createMock(ColumnHolder.class);
    EasyMock.expect(index.getColumnHolder(NON_STRING_DICTIONARY_COLUMN_NAME)).andReturn(nonStringHolder).anyTimes();
    EasyMock.expect(nonStringHolder.getCapabilities()).andReturn(ColumnCapabilitiesImpl.createDefault().setType(ColumnType.ofComplex("testBlob")).setDictionaryEncoded(true).setDictionaryValuesUnique(true).setDictionaryValuesSorted(true).setHasBitmapIndexes(true).setFilterable(true)).anyTimes();
    EasyMock.replay(bitmapFactory, virtualColumns, index, holder, stringColumn, nonStringHolder, someIndex, someBitmap);
}
Also used : ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory) StringDictionaryEncodedColumn(org.apache.druid.segment.column.StringDictionaryEncodedColumn) Before(org.junit.Before)

Example 18 with BitmapIndex

use of org.apache.druid.segment.column.BitmapIndex in project druid by druid-io.

the class ColumnSelectorBitmapIndexSelectorTest method testNonStringDictionaryDoNotUseIndex.

@Test
public void testNonStringDictionaryDoNotUseIndex() {
    BitmapIndex bitmapIndex = bitmapIndexSelector.getBitmapIndex(NON_STRING_DICTIONARY_COLUMN_NAME);
    Assert.assertNull(bitmapIndex);
    Indexed<String> vals = bitmapIndexSelector.getDimensionValues(NON_STRING_DICTIONARY_COLUMN_NAME);
    Assert.assertNull(vals);
    ImmutableBitmap valueIndex = bitmapIndexSelector.getBitmapIndex(NON_STRING_DICTIONARY_COLUMN_NAME, "foo");
    Assert.assertNull(valueIndex);
    EasyMock.verify(bitmapFactory, virtualColumns, index);
}
Also used : ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) Test(org.junit.Test)

Example 19 with BitmapIndex

use of org.apache.druid.segment.column.BitmapIndex in project druid by druid-io.

the class ListFilteredVirtualColumnSelectorTest method testFilterListFilteredVirtualColumnAllowIndex.

@Test
public void testFilterListFilteredVirtualColumnAllowIndex() {
    ListFilteredVirtualColumn virtualColumn = new ListFilteredVirtualColumn(ALLOW_VIRTUAL_NAME, new DefaultDimensionSpec(COLUMN_NAME, COLUMN_NAME, ColumnType.STRING), ImmutableSet.of("b", "c"), true);
    ColumnSelector selector = EasyMock.createMock(ColumnSelector.class);
    ColumnHolder holder = EasyMock.createMock(ColumnHolder.class);
    BitmapIndex index = EasyMock.createMock(BitmapIndex.class);
    ImmutableBitmap bitmap = EasyMock.createMock(ImmutableBitmap.class);
    BitmapFactory bitmapFactory = EasyMock.createMock(BitmapFactory.class);
    EasyMock.expect(selector.getColumnHolder(COLUMN_NAME)).andReturn(holder).atLeastOnce();
    EasyMock.expect(holder.getBitmapIndex()).andReturn(index).atLeastOnce();
    EasyMock.expect(index.getCardinality()).andReturn(3).atLeastOnce();
    EasyMock.expect(index.getValue(0)).andReturn("a").atLeastOnce();
    EasyMock.expect(index.getValue(1)).andReturn("b").atLeastOnce();
    EasyMock.expect(index.getValue(2)).andReturn("c").atLeastOnce();
    EasyMock.expect(index.getBitmap(2)).andReturn(bitmap).once();
    EasyMock.expect(index.getBitmapFactory()).andReturn(bitmapFactory).once();
    EasyMock.expect(index.hasNulls()).andReturn(true).once();
    EasyMock.replay(selector, holder, index, bitmap, bitmapFactory);
    ColumnSelectorBitmapIndexSelector bitmapIndexSelector = new ColumnSelectorBitmapIndexSelector(new RoaringBitmapFactory(), VirtualColumns.create(Collections.singletonList(virtualColumn)), selector);
    SelectorFilter filter = new SelectorFilter(ALLOW_VIRTUAL_NAME, "a");
    Assert.assertTrue(filter.shouldUseBitmapIndex(bitmapIndexSelector));
    BitmapIndex listFilteredIndex = bitmapIndexSelector.getBitmapIndex(ALLOW_VIRTUAL_NAME);
    Assert.assertEquals(-1, listFilteredIndex.getIndex("a"));
    Assert.assertEquals(0, listFilteredIndex.getIndex("b"));
    Assert.assertEquals(1, listFilteredIndex.getIndex("c"));
    Assert.assertEquals(2, listFilteredIndex.getCardinality());
    Assert.assertEquals("b", listFilteredIndex.getValue(0));
    Assert.assertEquals("c", listFilteredIndex.getValue(1));
    Assert.assertEquals(bitmap, listFilteredIndex.getBitmap(1));
    Assert.assertEquals(bitmapFactory, listFilteredIndex.getBitmapFactory());
    Assert.assertTrue(listFilteredIndex.hasNulls());
    EasyMock.verify(selector, holder, index, bitmap, bitmapFactory);
}
Also used : ColumnSelectorBitmapIndexSelector(org.apache.druid.segment.ColumnSelectorBitmapIndexSelector) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) SelectorFilter(org.apache.druid.segment.filter.SelectorFilter) ColumnSelector(org.apache.druid.segment.ColumnSelector) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 20 with BitmapIndex

use of org.apache.druid.segment.column.BitmapIndex in project druid by druid-io.

the class QueryableIndexIndexableAdapter method getBitmapIndex.

@VisibleForTesting
BitmapValues getBitmapIndex(String dimension, String value) {
    final ColumnHolder columnHolder = input.getColumnHolder(dimension);
    if (columnHolder == null) {
        return BitmapValues.EMPTY;
    }
    final BitmapIndex bitmaps = columnHolder.getBitmapIndex();
    if (bitmaps == null) {
        return BitmapValues.EMPTY;
    }
    return new ImmutableBitmapValues(bitmaps.getBitmap(bitmaps.getIndex(value)));
}
Also used : ColumnHolder(org.apache.druid.segment.column.ColumnHolder) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) ImmutableBitmapValues(org.apache.druid.segment.data.ImmutableBitmapValues) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

BitmapIndex (org.apache.druid.segment.column.BitmapIndex)21 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)10 ImmutableBitmap (org.apache.druid.collections.bitmap.ImmutableBitmap)9 BitmapFactory (org.apache.druid.collections.bitmap.BitmapFactory)8 RoaringBitmapFactory (org.apache.druid.collections.bitmap.RoaringBitmapFactory)6 Test (org.junit.Test)6 IOException (java.io.IOException)4 MutableBitmap (org.apache.druid.collections.bitmap.MutableBitmap)4 BitmapSerdeFactory (org.apache.druid.segment.data.BitmapSerdeFactory)4 RoaringBitmapSerdeFactory (org.apache.druid.segment.data.RoaringBitmapSerdeFactory)4 Function (com.google.common.base.Function)3 StringBitmapIndexColumnPartSupplier (org.apache.druid.segment.serde.StringBitmapIndexColumnPartSupplier)3 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)3 Setup (org.openjdk.jmh.annotations.Setup)3 UncheckedIOException (java.io.UncheckedIOException)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Nullable (javax.annotation.Nullable)2 ConciseBitmapFactory (org.apache.druid.collections.bitmap.ConciseBitmapFactory)2 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)2