use of com.linkedin.pinot.core.common.DataSourceMetadata in project pinot by linkedin.
the class DictionaryToRawIndexConverter method convertOneColumn.
/**
* Helper method to perform conversion for the specific column.
*
* @param segment Input segment to convert
* @param column Column to convert
* @param newSegment Directory where raw index to be written
* @throws IOException
*/
private void convertOneColumn(IndexSegment segment, String column, File newSegment) throws IOException {
DataSource dataSource = segment.getDataSource(column);
Dictionary dictionary = dataSource.getDictionary();
if (dictionary == null) {
LOGGER.error("Column '{}' does not have dictionary, cannot convert to raw index.", column);
return;
}
DataSourceMetadata dataSourceMetadata = dataSource.getDataSourceMetadata();
if (!dataSourceMetadata.isSingleValue()) {
LOGGER.error("Cannot convert multi-valued columns '{}'", column);
return;
}
int totalDocs = segment.getSegmentMetadata().getTotalDocs();
BlockSingleValIterator bvIter = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
FieldSpec.DataType dataType = dataSourceMetadata.getDataType();
int lengthOfLongestEntry = (dataType == FieldSpec.DataType.STRING) ? getLengthOfLongestEntry(bvIter, dictionary) : -1;
SingleValueRawIndexCreator rawIndexCreator = SegmentColumnarIndexCreator.getRawIndexCreatorForColumn(newSegment, column, dataType, totalDocs, lengthOfLongestEntry);
int docId = 0;
bvIter.reset();
while (bvIter.hasNext()) {
int dictId = bvIter.nextIntVal();
Object value = dictionary.get(dictId);
rawIndexCreator.index(docId++, value);
if (docId % 1000000 == 0) {
LOGGER.info("Converted {} records.", docId);
}
}
rawIndexCreator.close();
deleteForwardIndex(newSegment.getParentFile(), column, dataSourceMetadata.isSorted());
}
use of com.linkedin.pinot.core.common.DataSourceMetadata in project pinot by linkedin.
the class SelectionOperatorUtils method extractDataSchema.
/**
* Extract the {@link DataSchema} from sort sequence, selection columns and {@link IndexSegment}. (Inner segment)
* <p>Inside data schema, we just store each column once (de-duplicated).
*
* @param sortSequence sort sequence.
* @param selectionColumns selection columns.
* @param indexSegment index segment.
* @return data schema.
*/
@Nonnull
public static DataSchema extractDataSchema(@Nullable List<SelectionSort> sortSequence, @Nonnull List<String> selectionColumns, @Nonnull IndexSegment indexSegment) {
List<String> columnList = new ArrayList<>();
Set<String> columnSet = new HashSet<>();
if (sortSequence != null) {
for (SelectionSort selectionSort : sortSequence) {
String column = selectionSort.getColumn();
columnList.add(column);
columnSet.add(column);
}
}
for (String column : selectionColumns) {
if (!columnSet.contains(column)) {
columnList.add(column);
columnSet.add(column);
}
}
int numColumns = columnList.size();
String[] columns = new String[numColumns];
DataType[] dataTypes = new DataType[numColumns];
for (int i = 0; i < numColumns; i++) {
String column = columnList.get(i);
columns[i] = column;
DataSourceMetadata columnMetadata = indexSegment.getDataSource(column).getDataSourceMetadata();
if (columnMetadata.isSingleValue()) {
dataTypes[i] = columnMetadata.getDataType();
} else {
dataTypes[i] = columnMetadata.getDataType().toMultiValue();
}
}
return new DataSchema(columns, dataTypes);
}
use of com.linkedin.pinot.core.common.DataSourceMetadata in project pinot by linkedin.
the class FilterPlanNode method constructPhysicalOperator.
/**
* Helper method to build the operator tree from the filter query tree.
* @param filterQueryTree
* @param segment Index segment
* @param optimizeAlwaysFalse Optimize isResultEmpty predicates
* @return Filter Operator created
*/
@VisibleForTesting
public static BaseFilterOperator constructPhysicalOperator(FilterQueryTree filterQueryTree, IndexSegment segment, boolean optimizeAlwaysFalse) {
BaseFilterOperator ret;
if (null == filterQueryTree) {
return new MatchEntireSegmentOperator(segment.getSegmentMetadata().getTotalRawDocs());
}
final List<FilterQueryTree> childFilters = filterQueryTree.getChildren();
final boolean isLeaf = (childFilters == null) || childFilters.isEmpty();
if (!isLeaf) {
int numChildrenAlwaysFalse = 0;
int numChildren = childFilters.size();
List<BaseFilterOperator> operators = new ArrayList<>();
final FilterOperator filterType = filterQueryTree.getOperator();
for (final FilterQueryTree query : childFilters) {
BaseFilterOperator childOperator = constructPhysicalOperator(query, segment, optimizeAlwaysFalse);
// Count number of always false children.
if (optimizeAlwaysFalse && childOperator.isResultEmpty()) {
numChildrenAlwaysFalse++;
// Early bailout for 'AND' as soon as one of the children always evaluates to false.
if (filterType == FilterOperator.AND) {
break;
}
}
operators.add(childOperator);
}
ret = buildNonLeafOperator(filterType, operators, numChildrenAlwaysFalse, numChildren, optimizeAlwaysFalse);
} else {
final FilterOperator filterType = filterQueryTree.getOperator();
final String column = filterQueryTree.getColumn();
Predicate predicate = Predicate.newPredicate(filterQueryTree);
DataSource ds;
ds = segment.getDataSource(column);
DataSourceMetadata dataSourceMetadata = ds.getDataSourceMetadata();
BaseFilterOperator baseFilterOperator;
int startDocId = 0;
//end is inclusive
int endDocId = segment.getSegmentMetadata().getTotalRawDocs() - 1;
if (dataSourceMetadata.hasInvertedIndex()) {
// range evaluation based on inv index is inefficient, so do this only if is NOT range.
if (!filterType.equals(FilterOperator.RANGE)) {
if (dataSourceMetadata.isSingleValue() && dataSourceMetadata.isSorted()) {
// if the column is sorted use sorted inverted index based implementation
baseFilterOperator = new SortedInvertedIndexBasedFilterOperator(predicate, ds, startDocId, endDocId);
} else {
baseFilterOperator = new BitmapBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
} else {
baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
} else {
baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
ret = baseFilterOperator;
}
// If operator evaluates to false, then just return an empty operator.
if (ret.isResultEmpty()) {
ret = new EmptyFilterOperator();
}
return ret;
}
use of com.linkedin.pinot.core.common.DataSourceMetadata in project pinot by linkedin.
the class ScanBasedFilterOperator method nextFilterBlock.
@Override
public BaseFilterBlock nextFilterBlock(BlockId BlockId) {
DataSourceMetadata dataSourceMetadata = dataSource.getDataSourceMetadata();
FilterBlockDocIdSet docIdSet;
Block nextBlock = dataSource.nextBlock();
BlockValSet blockValueSet = nextBlock.getBlockValueSet();
BlockMetadata blockMetadata = nextBlock.getMetadata();
if (dataSourceMetadata.isSingleValue()) {
docIdSet = new ScanBasedSingleValueDocIdSet(dataSource.getOperatorName(), blockValueSet, blockMetadata, predicateEvaluator);
} else {
docIdSet = new ScanBasedMultiValueDocIdSet(dataSource.getOperatorName(), blockValueSet, blockMetadata, predicateEvaluator);
}
if (startDocId != null) {
docIdSet.setStartDocId(startDocId);
}
if (endDocId != null) {
docIdSet.setEndDocId(endDocId);
}
return new ScanBlock(docIdSet);
}
use of com.linkedin.pinot.core.common.DataSourceMetadata in project pinot by linkedin.
the class StarTreeIndexOperator method createChildOperator.
private BaseFilterOperator createChildOperator(int startDocId, int endDocId, String column, PredicateEntry predicateEntry) {
DataSource dataSource = segment.getDataSource(column);
DataSourceMetadata dataSourceMetadata = dataSource.getDataSourceMetadata();
BaseFilterOperator childOperator;
Predicate predicate = predicateEntry.predicate;
if (dataSourceMetadata.hasInvertedIndex()) {
if (dataSourceMetadata.isSorted()) {
childOperator = new SortedInvertedIndexBasedFilterOperator(predicate, dataSource, startDocId, endDocId);
} else {
childOperator = new BitmapBasedFilterOperator(predicate, dataSource, startDocId, endDocId);
}
} else {
childOperator = new ScanBasedFilterOperator(predicate, dataSource, startDocId, endDocId);
}
return childOperator;
}
Aggregations