use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class QueryableIndexVectorColumnSelectorFactory method makeSingleValueDimensionSelector.
@Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final DimensionSpec dimensionSpec) {
if (!dimensionSpec.canVectorize()) {
throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec);
}
Function<DimensionSpec, SingleValueDimensionVectorSelector> mappingFunction = spec -> {
if (virtualColumns.exists(spec.getDimension())) {
SingleValueDimensionVectorSelector dimensionSelector = virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, index, offset);
if (dimensionSelector == null) {
return virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, this);
} else {
return dimensionSelector;
}
}
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null || !holder.getCapabilities().isDictionaryEncoded().isTrue() || !holder.getCapabilities().is(ValueType.STRING)) {
// Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls.
return NilVectorSelector.create(offset);
}
if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
// Asking for a single-value dimension selector on a multi-value column gets you an error.
throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension());
}
@SuppressWarnings("unchecked") final DictionaryEncodedColumn<String> dictionaryEncodedColumn = (DictionaryEncodedColumn<String>) getCachedColumn(spec.getDimension());
// dictionaryEncodedColumn is not null because of holder null check above
assert dictionaryEncodedColumn != null;
final SingleValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset);
return spec.decorate(selector);
};
// We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through
// virtual column references, triggering a ConcurrentModificationException in JDK 9 and above.
SingleValueDimensionVectorSelector selector = singleValueDimensionSelectorCache.get(dimensionSpec);
if (selector == null) {
selector = mappingFunction.apply(dimensionSpec);
singleValueDimensionSelectorCache.put(dimensionSpec, selector);
}
return selector;
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class QueryableIndexVectorColumnSelectorFactory method makeMultiValueDimensionSelector.
@Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final DimensionSpec dimensionSpec) {
if (!dimensionSpec.canVectorize()) {
throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec);
}
Function<DimensionSpec, MultiValueDimensionVectorSelector> mappingFunction = spec -> {
if (virtualColumns.exists(spec.getDimension())) {
MultiValueDimensionVectorSelector dimensionSelector = virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, index, offset);
if (dimensionSelector == null) {
return virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, this);
} else {
return dimensionSelector;
}
}
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null || holder.getCapabilities().isDictionaryEncoded().isFalse() || !holder.getCapabilities().is(ValueType.STRING) || holder.getCapabilities().hasMultipleValues().isFalse()) {
throw new ISE("Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension());
}
@SuppressWarnings("unchecked") final DictionaryEncodedColumn<String> dictionaryEncodedColumn = (DictionaryEncodedColumn<String>) getCachedColumn(spec.getDimension());
// dictionaryEncodedColumn is not null because of holder null check above
assert dictionaryEncodedColumn != null;
final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector(offset);
return spec.decorate(selector);
};
// We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through
// virtual column references, triggering a ConcurrentModificationException in JDK 9 and above.
MultiValueDimensionVectorSelector selector = multiValueDimensionSelectorCache.get(dimensionSpec);
if (selector == null) {
selector = mappingFunction.apply(dimensionSpec);
multiValueDimensionSelectorCache.put(dimensionSpec, selector);
}
return selector;
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class IndexMergerNullHandlingTest method testStringColumnNullHandling.
@Test
public void testStringColumnNullHandling() throws Exception {
List<Map<String, Object>> nonNullFlavors = new ArrayList<>();
nonNullFlavors.add(ImmutableMap.of("d", "a"));
nonNullFlavors.add(ImmutableMap.of("d", ImmutableList.of("a", "b")));
List<Map<String, Object>> nullFlavors = new ArrayList<>();
Map<String, Object> mMissing = ImmutableMap.of();
Map<String, Object> mEmptyList = ImmutableMap.of("d", Collections.emptyList());
Map<String, Object> mNull = new HashMap<>();
mNull.put("d", null);
Map<String, Object> mEmptyString = ImmutableMap.of("d", "");
Map<String, Object> mListOfNull = ImmutableMap.of("d", Collections.singletonList(null));
Map<String, Object> mListOfEmptyString = ImmutableMap.of("d", Collections.singletonList(""));
nullFlavors.add(mMissing);
nullFlavors.add(mEmptyList);
nullFlavors.add(mNull);
nullFlavors.add(mListOfNull);
if (NullHandling.replaceWithDefault()) {
nullFlavors.add(mEmptyString);
nullFlavors.add(mListOfEmptyString);
} else {
nonNullFlavors.add(mEmptyString);
nonNullFlavors.add(mListOfEmptyString);
}
Set<Map<String, Object>> allValues = new HashSet<>();
allValues.addAll(nonNullFlavors);
allValues.addAll(nullFlavors);
for (Set<Map<String, Object>> subset : Sets.powerSet(allValues)) {
if (subset.isEmpty()) {
continue;
}
final List<Map<String, Object>> subsetList = new ArrayList<>(subset);
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(new AggregatorFactory[] {});
for (Map<String, Object> m : subsetList) {
toPersist.add(new MapBasedInputRow(0L, ImmutableList.of("d"), m));
}
final File tempDir = temporaryFolder.newFolder();
try (QueryableIndex index = indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null))) {
final ColumnHolder columnHolder = index.getColumnHolder("d");
if (nullFlavors.containsAll(subsetList)) {
// all null -> should be missing
Assert.assertNull(subsetList.toString(), columnHolder);
} else {
Assert.assertNotNull(subsetList.toString(), columnHolder);
// The column has multiple values if there are any lists with > 1 element in the input set.
final boolean hasMultipleValues = subsetList.stream().anyMatch(m -> m.get("d") instanceof List && (((List) m.get("d")).size() > 1));
// Compute all unique values, the same way that IndexMerger is expected to do it.
final Set<String> uniqueValues = new HashSet<>();
for (Map<String, Object> m : subsetList) {
final List<String> dValues = normalize(m.get("d"), hasMultipleValues);
uniqueValues.addAll(dValues);
if (nullFlavors.contains(m)) {
uniqueValues.add(null);
}
}
try (final DictionaryEncodedColumn<String> dictionaryColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn()) {
// Verify unique values against the dictionary.
Assert.assertEquals(subsetList.toString(), uniqueValues.stream().sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList()), IntStream.range(0, dictionaryColumn.getCardinality()).mapToObj(dictionaryColumn::lookupName).collect(Collectors.toList()));
Assert.assertEquals(subsetList.toString(), hasMultipleValues, dictionaryColumn.hasMultipleValues());
Assert.assertEquals(subsetList.toString(), uniqueValues.size(), dictionaryColumn.getCardinality());
// Verify the expected set of rows was indexed, ignoring order.
Assert.assertEquals(subsetList.toString(), ImmutableMultiset.copyOf(subsetList.stream().map(m -> normalize(m.get("d"), hasMultipleValues)).distinct().collect(Collectors.toList())), ImmutableMultiset.copyOf(IntStream.range(0, index.getNumRows()).mapToObj(rowNumber -> getRow(dictionaryColumn, rowNumber)).distinct().collect(Collectors.toList())));
// Verify that the bitmap index for null is correct.
final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
// Read through the column to find all the rows that should match null.
final List<Integer> expectedNullRows = new ArrayList<>();
for (int i = 0; i < index.getNumRows(); i++) {
final List<String> row = getRow(dictionaryColumn, i);
if (row.isEmpty() || row.stream().anyMatch(NullHandling::isNullOrEquivalent)) {
expectedNullRows.add(i);
}
}
Assert.assertEquals(subsetList.toString(), expectedNullRows.size() > 0, bitmapIndex.hasNulls());
if (expectedNullRows.size() > 0) {
Assert.assertEquals(subsetList.toString(), 0, bitmapIndex.getIndex(null));
final ImmutableBitmap nullBitmap = bitmapIndex.getBitmap(bitmapIndex.getIndex(null));
final List<Integer> actualNullRows = new ArrayList<>();
final IntIterator iterator = nullBitmap.iterator();
while (iterator.hasNext()) {
actualNullRows.add(iterator.next());
}
Assert.assertEquals(subsetList.toString(), expectedNullRows, actualNullRows);
} else {
Assert.assertEquals(-1, bitmapIndex.getIndex(null));
}
}
}
}
}
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class ColumnSelectorBitmapIndexSelectorTest method setup.
@Before
public void setup() {
bitmapFactory = EasyMock.createMock(BitmapFactory.class);
virtualColumns = EasyMock.createMock(VirtualColumns.class);
index = EasyMock.createMock(ColumnSelector.class);
bitmapIndexSelector = new ColumnSelectorBitmapIndexSelector(bitmapFactory, virtualColumns, index);
EasyMock.expect(virtualColumns.getVirtualColumn(STRING_DICTIONARY_COLUMN_NAME)).andReturn(null).anyTimes();
EasyMock.expect(virtualColumns.getVirtualColumn(NON_STRING_DICTIONARY_COLUMN_NAME)).andReturn(null).anyTimes();
ColumnHolder holder = EasyMock.createMock(ColumnHolder.class);
EasyMock.expect(index.getColumnHolder(STRING_DICTIONARY_COLUMN_NAME)).andReturn(holder).anyTimes();
StringDictionaryEncodedColumn stringColumn = EasyMock.createMock(StringDictionaryEncodedColumn.class);
EasyMock.expect(holder.getCapabilities()).andReturn(ColumnCapabilitiesImpl.createDefault().setType(ColumnType.STRING).setDictionaryEncoded(true).setDictionaryValuesUnique(true).setDictionaryValuesSorted(true).setHasBitmapIndexes(true)).anyTimes();
EasyMock.expect(holder.getColumn()).andReturn(stringColumn).anyTimes();
BitmapIndex someIndex = EasyMock.createMock(BitmapIndex.class);
EasyMock.expect(holder.getBitmapIndex()).andReturn(someIndex).anyTimes();
ImmutableBitmap someBitmap = EasyMock.createMock(ImmutableBitmap.class);
EasyMock.expect(someIndex.getIndex("foo")).andReturn(0).anyTimes();
EasyMock.expect(someIndex.getBitmap(0)).andReturn(someBitmap).anyTimes();
ColumnHolder nonStringHolder = EasyMock.createMock(ColumnHolder.class);
EasyMock.expect(index.getColumnHolder(NON_STRING_DICTIONARY_COLUMN_NAME)).andReturn(nonStringHolder).anyTimes();
EasyMock.expect(nonStringHolder.getCapabilities()).andReturn(ColumnCapabilitiesImpl.createDefault().setType(ColumnType.ofComplex("testBlob")).setDictionaryEncoded(true).setDictionaryValuesUnique(true).setDictionaryValuesSorted(true).setHasBitmapIndexes(true).setFilterable(true)).anyTimes();
EasyMock.replay(bitmapFactory, virtualColumns, index, holder, stringColumn, nonStringHolder, someIndex, someBitmap);
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class ListFilteredVirtualColumnSelectorTest method testFilterListFilteredVirtualColumnAllowIndex.
@Test
public void testFilterListFilteredVirtualColumnAllowIndex() {
ListFilteredVirtualColumn virtualColumn = new ListFilteredVirtualColumn(ALLOW_VIRTUAL_NAME, new DefaultDimensionSpec(COLUMN_NAME, COLUMN_NAME, ColumnType.STRING), ImmutableSet.of("b", "c"), true);
ColumnSelector selector = EasyMock.createMock(ColumnSelector.class);
ColumnHolder holder = EasyMock.createMock(ColumnHolder.class);
BitmapIndex index = EasyMock.createMock(BitmapIndex.class);
ImmutableBitmap bitmap = EasyMock.createMock(ImmutableBitmap.class);
BitmapFactory bitmapFactory = EasyMock.createMock(BitmapFactory.class);
EasyMock.expect(selector.getColumnHolder(COLUMN_NAME)).andReturn(holder).atLeastOnce();
EasyMock.expect(holder.getBitmapIndex()).andReturn(index).atLeastOnce();
EasyMock.expect(index.getCardinality()).andReturn(3).atLeastOnce();
EasyMock.expect(index.getValue(0)).andReturn("a").atLeastOnce();
EasyMock.expect(index.getValue(1)).andReturn("b").atLeastOnce();
EasyMock.expect(index.getValue(2)).andReturn("c").atLeastOnce();
EasyMock.expect(index.getBitmap(2)).andReturn(bitmap).once();
EasyMock.expect(index.getBitmapFactory()).andReturn(bitmapFactory).once();
EasyMock.expect(index.hasNulls()).andReturn(true).once();
EasyMock.replay(selector, holder, index, bitmap, bitmapFactory);
ColumnSelectorBitmapIndexSelector bitmapIndexSelector = new ColumnSelectorBitmapIndexSelector(new RoaringBitmapFactory(), VirtualColumns.create(Collections.singletonList(virtualColumn)), selector);
SelectorFilter filter = new SelectorFilter(ALLOW_VIRTUAL_NAME, "a");
Assert.assertTrue(filter.shouldUseBitmapIndex(bitmapIndexSelector));
BitmapIndex listFilteredIndex = bitmapIndexSelector.getBitmapIndex(ALLOW_VIRTUAL_NAME);
Assert.assertEquals(-1, listFilteredIndex.getIndex("a"));
Assert.assertEquals(0, listFilteredIndex.getIndex("b"));
Assert.assertEquals(1, listFilteredIndex.getIndex("c"));
Assert.assertEquals(2, listFilteredIndex.getCardinality());
Assert.assertEquals("b", listFilteredIndex.getValue(0));
Assert.assertEquals("c", listFilteredIndex.getValue(1));
Assert.assertEquals(bitmap, listFilteredIndex.getBitmap(1));
Assert.assertEquals(bitmapFactory, listFilteredIndex.getBitmapFactory());
Assert.assertTrue(listFilteredIndex.hasNulls());
EasyMock.verify(selector, holder, index, bitmap, bitmapFactory);
}
Aggregations