use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method setup.
@BeforeClass
public void setup() throws Exception {
buildSegment();
// Load the segment.
File segment = new File(SEGMENT_DIR_NAME, SEGMENT_NAME);
_indexSegment = Loaders.IndexSegment.load(segment, ReadMode.heap);
// Build the data source map
_dataSourceMap = new HashMap<>();
for (String column : _indexSegment.getColumnNames()) {
DataSource dataSource = _indexSegment.getDataSource(column);
_dataSourceMap.put(column, dataSource);
}
}
use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.
the class FilterPlanNode method constructPhysicalOperator.
/**
* Helper method to build the operator tree from the filter query tree.
* @param filterQueryTree
* @param segment Index segment
* @param optimizeAlwaysFalse Optimize isResultEmpty predicates
* @return Filter Operator created
*/
@VisibleForTesting
public static BaseFilterOperator constructPhysicalOperator(FilterQueryTree filterQueryTree, IndexSegment segment, boolean optimizeAlwaysFalse) {
BaseFilterOperator ret;
if (null == filterQueryTree) {
return new MatchEntireSegmentOperator(segment.getSegmentMetadata().getTotalRawDocs());
}
final List<FilterQueryTree> childFilters = filterQueryTree.getChildren();
final boolean isLeaf = (childFilters == null) || childFilters.isEmpty();
if (!isLeaf) {
int numChildrenAlwaysFalse = 0;
int numChildren = childFilters.size();
List<BaseFilterOperator> operators = new ArrayList<>();
final FilterOperator filterType = filterQueryTree.getOperator();
for (final FilterQueryTree query : childFilters) {
BaseFilterOperator childOperator = constructPhysicalOperator(query, segment, optimizeAlwaysFalse);
// Count number of always false children.
if (optimizeAlwaysFalse && childOperator.isResultEmpty()) {
numChildrenAlwaysFalse++;
// Early bailout for 'AND' as soon as one of the children always evaluates to false.
if (filterType == FilterOperator.AND) {
break;
}
}
operators.add(childOperator);
}
ret = buildNonLeafOperator(filterType, operators, numChildrenAlwaysFalse, numChildren, optimizeAlwaysFalse);
} else {
final FilterOperator filterType = filterQueryTree.getOperator();
final String column = filterQueryTree.getColumn();
Predicate predicate = Predicate.newPredicate(filterQueryTree);
DataSource ds;
ds = segment.getDataSource(column);
DataSourceMetadata dataSourceMetadata = ds.getDataSourceMetadata();
BaseFilterOperator baseFilterOperator;
int startDocId = 0;
//end is inclusive
int endDocId = segment.getSegmentMetadata().getTotalRawDocs() - 1;
if (dataSourceMetadata.hasInvertedIndex()) {
// range evaluation based on inv index is inefficient, so do this only if is NOT range.
if (!filterType.equals(FilterOperator.RANGE)) {
if (dataSourceMetadata.isSingleValue() && dataSourceMetadata.isSorted()) {
// if the column is sorted use sorted inverted index based implementation
baseFilterOperator = new SortedInvertedIndexBasedFilterOperator(predicate, ds, startDocId, endDocId);
} else {
baseFilterOperator = new BitmapBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
} else {
baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
} else {
baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
ret = baseFilterOperator;
}
// If operator evaluates to false, then just return an empty operator.
if (ret.isResultEmpty()) {
ret = new EmptyFilterOperator();
}
return ret;
}
use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.
the class RealtimeSegmentImpl method getDataSource.
public DataSource getDataSource(String columnName, Predicate p) {
DataSource ds = getDataSource(columnName);
ds.setPredicate(p);
return ds;
}
use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.
the class IndexSegmentImpl method iterator.
public Iterator<GenericRow> iterator(final int startDocId, final int endDocId) {
final Map<String, BlockSingleValIterator> singleValIteratorMap = new HashMap<>();
final Map<String, BlockMultiValIterator> multiValIteratorMap = new HashMap<>();
for (String column : getColumnNames()) {
DataSource dataSource = getDataSource(column);
BlockValIterator iterator = dataSource.getNextBlock().getBlockValueSet().iterator();
if (dataSource.getDataSourceMetadata().isSingleValue()) {
singleValIteratorMap.put(column, (BlockSingleValIterator) iterator);
} else {
multiValIteratorMap.put(column, (BlockMultiValIterator) iterator);
}
}
return new Iterator<GenericRow>() {
int docId = startDocId;
@Override
public boolean hasNext() {
return docId < endDocId;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public GenericRow next() {
Map<String, Object> map = new HashMap<>();
for (String column : singleValIteratorMap.keySet()) {
int dictId = singleValIteratorMap.get(column).nextIntVal();
Dictionary dictionary = getDictionaryFor(column);
map.put(column, dictionary.get(dictId));
}
for (String column : multiValIteratorMap.keySet()) {
//TODO:handle multi value
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
docId++;
return genericRow;
}
};
}
use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.
the class SegmentDumpTool method doMain.
public void doMain(String[] args) throws Exception {
CmdLineParser parser = new CmdLineParser(this);
parser.parseArgument(args);
File segmentDir = new File(segmentPath);
SegmentMetadata metadata = new SegmentMetadataImpl(segmentDir);
// All columns by default
if (columnNames == null) {
columnNames = new ArrayList<String>(metadata.getSchema().getColumnNames());
Collections.sort(columnNames);
}
IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.mmap);
Map<String, Dictionary> dictionaries = new HashMap<String, Dictionary>();
Map<String, BlockSingleValIterator> iterators = new HashMap<String, BlockSingleValIterator>();
for (String columnName : columnNames) {
DataSource dataSource = indexSegment.getDataSource(columnName);
dataSource.open();
Block block = dataSource.nextBlock();
BlockValSet blockValSet = block.getBlockValueSet();
BlockSingleValIterator itr = (BlockSingleValIterator) blockValSet.iterator();
iterators.put(columnName, itr);
dictionaries.put(columnName, dataSource.getDictionary());
}
System.out.print("Doc\t");
for (String columnName : columnNames) {
System.out.print(columnName);
System.out.print("\t");
}
System.out.println();
for (int i = 0; i < indexSegment.getSegmentMetadata().getTotalDocs(); i++) {
System.out.print(i);
System.out.print("\t");
for (String columnName : columnNames) {
FieldSpec.DataType columnType = metadata.getSchema().getFieldSpecFor(columnName).getDataType();
BlockSingleValIterator itr = iterators.get(columnName);
Integer encodedValue = itr.nextIntVal();
Object value = dictionaries.get(columnName).get(encodedValue);
System.out.print(value);
System.out.print("\t");
}
System.out.println();
}
if (dumpStarTree) {
System.out.println();
File starTreeFile = new File(segmentDir, V1Constants.STAR_TREE_INDEX_FILE);
StarTreeInterf tree = StarTreeSerDe.fromFile(starTreeFile, ReadMode.mmap);
tree.printTree();
}
}
Aggregations