use of org.apache.druid.segment.vector.VectorOffset in project druid by druid-io.
the class QueryableIndexCursorSequenceBuilder method buildVectorized.
public VectorCursor buildVectorized(final int vectorSize) {
// Sanity check - matches QueryableIndexStorageAdapter.canVectorize
Preconditions.checkState(!descending, "!descending");
final Map<String, BaseColumn> columnCache = new HashMap<>();
final Closer closer = Closer.create();
NumericColumn timestamps = null;
final int startOffset;
final int endOffset;
if (interval.getStartMillis() > minDataTimestamp) {
timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn();
closer.register(timestamps);
startOffset = timeSearch(timestamps, interval.getStartMillis(), 0, index.getNumRows());
} else {
startOffset = 0;
}
if (interval.getEndMillis() <= maxDataTimestamp) {
if (timestamps == null) {
timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn();
closer.register(timestamps);
}
endOffset = timeSearch(timestamps, interval.getEndMillis(), startOffset, index.getNumRows());
} else {
endOffset = index.getNumRows();
}
final VectorOffset baseOffset = filterBitmap == null ? new NoFilterVectorOffset(vectorSize, startOffset, endOffset) : new BitmapVectorOffset(vectorSize, filterBitmap, startOffset, endOffset);
// baseColumnSelectorFactory using baseOffset is the column selector for filtering.
final VectorColumnSelectorFactory baseColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset(columnCache, baseOffset, closer);
if (postFilter == null) {
return new QueryableIndexVectorCursor(baseColumnSelectorFactory, baseOffset, vectorSize, closer);
} else {
final VectorOffset filteredOffset = FilteredVectorOffset.create(baseOffset, baseColumnSelectorFactory, postFilter);
// Now create the cursor and column selector that will be returned to the caller.
//
// There is an inefficiency with how we do things here: this cursor (the one that will be provided to the
// caller) does share a columnCache with "baseColumnSelectorFactory", but it *doesn't* share vector data. This
// means that if the caller wants to read from a column that is also used for filtering, the underlying column
// object will get hit twice for some of the values (anything that matched the filter). This is probably most
// noticeable if it causes thrashing of decompression buffers due to out-of-order reads. I haven't observed
// this directly but it seems possible in principle.
// baseColumnSelectorFactory using baseOffset is the column selector for filtering.
final VectorColumnSelectorFactory filteredColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset(columnCache, filteredOffset, closer);
return new QueryableIndexVectorCursor(filteredColumnSelectorFactory, filteredOffset, vectorSize, closer);
}
}
Aggregations