use of org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector in project druid by druid-io.
the class ColumnSelectorBitmapIndexSelector method getDimensionValues.
@Nullable
@Override
public CloseableIndexed<String> getDimensionValues(String dimension) {
if (isVirtualColumn(dimension)) {
BitmapIndex bitmapIndex = virtualColumns.getBitmapIndex(dimension, index);
if (bitmapIndex == null) {
return null;
}
return new CloseableIndexed<String>() {
@Override
public int size() {
return bitmapIndex.getCardinality();
}
@Override
public String get(int index) {
return bitmapIndex.getValue(index);
}
@Override
public int indexOf(String value) {
return bitmapIndex.getIndex(value);
}
@Override
public Iterator<String> iterator() {
return IndexedIterable.create(this).iterator();
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("column", bitmapIndex);
}
@Override
public void close() {
}
};
}
final ColumnHolder columnHolder = index.getColumnHolder(dimension);
if (columnHolder == null) {
return null;
}
if (!columnHolder.getCapabilities().toColumnType().is(ValueType.STRING)) {
// work correctly here until reworking is done to support filtering/indexing other types of columns
return null;
}
BaseColumn col = columnHolder.getColumn();
if (!(col instanceof DictionaryEncodedColumn)) {
return null;
}
final DictionaryEncodedColumn<String> column = (DictionaryEncodedColumn<String>) col;
return new CloseableIndexed<String>() {
@Override
public int size() {
return column.getCardinality();
}
@Override
public String get(int index) {
return column.lookupName(index);
}
@Override
public int indexOf(String value) {
return column.lookupId(value);
}
@Override
public Iterator<String> iterator() {
return IndexedIterable.create(this).iterator();
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("column", column);
}
@Override
public void close() throws IOException {
column.close();
}
};
}
use of org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector in project druid by druid-io.
the class ExpressionVirtualColumnTest method testMultiObjectSelectorMakesRightSelector.
@Test
public void testMultiObjectSelectorMakesRightSelector() {
DimensionSpec spec = new DefaultDimensionSpec("expr", "expr");
// do some ugly faking to test if SingleStringInputDeferredEvaluationExpressionDimensionSelector is created for multi-value expressions when possible
ColumnSelectorFactory factory = new ColumnSelectorFactory() {
@Override
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
DimensionSelector delegate = COLUMN_SELECTOR_FACTORY.makeDimensionSelector(dimensionSpec);
DimensionSelector faker = new DimensionSelector() {
@Override
public IndexedInts getRow() {
return delegate.getRow();
}
@Override
public ValueMatcher makeValueMatcher(@Nullable String value) {
return delegate.makeValueMatcher(value);
}
@Override
public ValueMatcher makeValueMatcher(Predicate<String> predicate) {
return delegate.makeValueMatcher(predicate);
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
delegate.inspectRuntimeShape(inspector);
}
@Nullable
@Override
public Object getObject() {
return delegate.getObject();
}
@Override
public Class<?> classOfObject() {
return delegate.classOfObject();
}
@Override
public int getValueCardinality() {
// value doesn't matter as long as not CARDINALITY_UNKNOWN
return 3;
}
@Nullable
@Override
public String lookupName(int id) {
return null;
}
@Override
public boolean nameLookupPossibleInAdvance() {
// fake this so when SingleStringInputDeferredEvaluationExpressionDimensionSelector it doesn't explode
return true;
}
@Nullable
@Override
public IdLookup idLookup() {
return name -> 0;
}
};
return faker;
}
@Override
public ColumnValueSelector makeColumnValueSelector(String columnName) {
return COLUMN_SELECTOR_FACTORY.makeColumnValueSelector(columnName);
}
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column) {
return new ColumnCapabilitiesImpl().setType(ColumnType.STRING).setHasMultipleValues(true).setDictionaryEncoded(true);
}
};
final BaseObjectColumnValueSelector selectorImplicit = SCALE_LIST_SELF_IMPLICIT.makeDimensionSelector(spec, factory);
final BaseObjectColumnValueSelector selectorExplicit = SCALE_LIST_SELF_EXPLICIT.makeDimensionSelector(spec, factory);
Assert.assertTrue(selectorImplicit instanceof SingleStringInputDeferredEvaluationExpressionDimensionSelector);
Assert.assertTrue(selectorExplicit instanceof ExpressionMultiValueDimensionSelector);
}
use of org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = temporaryFolder.newFolder();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(new ObjectColumnSelector() {
@Nullable
@Override
public Object getObject() {
return collector;
}
@Override
public Class classOfObject() {
return HyperLogLogCollector.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// doesn't matter in tests
}
});
}
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeTo(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder, null);
ColumnHolder columnHolder = builder.build();
ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
use of org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector in project druid by druid-io.
the class DimensionSelectorUtils method makeDictionaryEncodedValueMatcherGeneric.
private static ValueMatcher makeDictionaryEncodedValueMatcherGeneric(final DimensionSelector selector, final int valueId, final boolean matchNull) {
if (valueId >= 0) {
return new ValueMatcher() {
@Override
public boolean matches() {
final IndexedInts row = selector.getRow();
final int size = row.size();
if (size == 0) {
// null should match empty rows in multi-value columns
return matchNull;
} else {
for (int i = 0; i < size; ++i) {
if (row.get(i) == valueId) {
return true;
}
}
return false;
}
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("selector", selector);
}
};
} else {
if (matchNull) {
return new ValueMatcher() {
@Override
public boolean matches() {
final IndexedInts row = selector.getRow();
final int size = row.size();
return size == 0;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("selector", selector);
}
};
} else {
return BooleanValueMatcher.of(false);
}
}
}
use of org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector in project druid by druid-io.
the class StringDimensionIndexer method makeDimensionSelector.
@Override
public DimensionSelector makeDimensionSelector(final DimensionSpec spec, final IncrementalIndexRowHolder currEntry, final IncrementalIndex.DimensionDesc desc) {
final ExtractionFn extractionFn = spec.getExtractionFn();
final int dimIndex = desc.getIndex();
// maxId is used in concert with getLastRowIndex() in IncrementalIndex to ensure that callers do not encounter
// rows that contain IDs over the initially-reported cardinality. The main idea is that IncrementalIndex establishes
// a watermark at the time a cursor is created, and doesn't allow the cursor to walk past that watermark.
//
// Additionally, this selector explicitly blocks knowledge of IDs past maxId that may occur from other causes
// (for example: nulls getting generated for empty arrays, or calls to lookupId).
final int maxId = getCardinality();
class IndexerDimensionSelector implements DimensionSelector, IdLookup {
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
@Nullable
@MonotonicNonNull
private int[] nullIdIntArray;
@Override
public IndexedInts getRow() {
final Object[] dims = currEntry.get().getDims();
int[] indices;
if (dimIndex < dims.length) {
indices = (int[]) dims[dimIndex];
} else {
indices = null;
}
int[] row = null;
int rowSize = 0;
// usually due to currEntry's rowIndex is smaller than the row's rowIndex in which this dim first appears
if (indices == null || indices.length == 0) {
if (hasMultipleValues) {
row = IntArrays.EMPTY_ARRAY;
rowSize = 0;
} else {
final int nullId = getEncodedValue(null, false);
if (nullId >= 0 && nullId < maxId) {
// null was added to the dictionary before this selector was created; return its ID.
if (nullIdIntArray == null) {
nullIdIntArray = new int[] { nullId };
}
row = nullIdIntArray;
rowSize = 1;
} else {
// null doesn't exist in the dictionary; return an empty array.
// Choose to use ArrayBasedIndexedInts later, instead of special "empty" IndexedInts, for monomorphism
row = IntArrays.EMPTY_ARRAY;
rowSize = 0;
}
}
}
if (row == null && indices != null && indices.length > 0) {
row = indices;
rowSize = indices.length;
}
indexedInts.setValues(row, rowSize);
return indexedInts;
}
@Override
public ValueMatcher makeValueMatcher(final String value) {
if (extractionFn == null) {
final int valueId = lookupId(value);
if (valueId >= 0 || value == null) {
return new ValueMatcher() {
@Override
public boolean matches() {
Object[] dims = currEntry.get().getDims();
if (dimIndex >= dims.length) {
return value == null;
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return value == null;
}
for (int id : dimsInt) {
if (id == valueId) {
return true;
}
}
return false;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// nothing to inspect
}
};
} else {
return BooleanValueMatcher.of(false);
}
} else {
// Employ caching BitSet optimization
return makeValueMatcher(Predicates.equalTo(value));
}
}
@Override
public ValueMatcher makeValueMatcher(final Predicate<String> predicate) {
final BitSet checkedIds = new BitSet(maxId);
final BitSet matchingIds = new BitSet(maxId);
final boolean matchNull = predicate.apply(null);
// Lazy matcher; only check an id if matches() is called.
return new ValueMatcher() {
@Override
public boolean matches() {
Object[] dims = currEntry.get().getDims();
if (dimIndex >= dims.length) {
return matchNull;
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return matchNull;
}
for (int id : dimsInt) {
if (checkedIds.get(id)) {
if (matchingIds.get(id)) {
return true;
}
} else {
final boolean matches = predicate.apply(lookupName(id));
checkedIds.set(id);
if (matches) {
matchingIds.set(id);
return true;
}
}
}
return false;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// nothing to inspect
}
};
}
@Override
public int getValueCardinality() {
return maxId;
}
@Override
public String lookupName(int id) {
if (id >= maxId) {
// Sanity check; IDs beyond maxId should not be known to callers. (See comment above.)
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
}
final String strValue = getActualValue(id, false);
return extractionFn == null ? strValue : extractionFn.apply(strValue);
}
@Override
public boolean nameLookupPossibleInAdvance() {
return dictionaryEncodesAllValues();
}
@Nullable
@Override
public IdLookup idLookup() {
return extractionFn == null ? this : null;
}
@Override
public int lookupId(String name) {
if (extractionFn != null) {
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
}
final int id = getEncodedValue(name, false);
if (id < maxId) {
return id;
} else {
// doesn't exist.
return DimensionDictionary.ABSENT_VALUE_ID;
}
}
@SuppressWarnings("deprecation")
@Nullable
@Override
public Object getObject() {
IncrementalIndexRow key = currEntry.get();
if (key == null) {
return null;
}
Object[] dims = key.getDims();
if (dimIndex >= dims.length) {
return null;
}
return convertUnsortedEncodedKeyComponentToActualList((int[]) dims[dimIndex]);
}
@SuppressWarnings("deprecation")
@Override
public Class classOfObject() {
return Object.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// nothing to inspect
}
}
return new IndexerDimensionSelector();
}
Aggregations