Search in sources :

Example 16 with DataSource

use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.

the class NoDictionaryGroupKeyGeneratorTest method setup.

@BeforeClass
public void setup() throws Exception {
    buildSegment();
    // Load the segment.
    File segment = new File(SEGMENT_DIR_NAME, SEGMENT_NAME);
    _indexSegment = Loaders.IndexSegment.load(segment, ReadMode.heap);
    // Build the data source map
    _dataSourceMap = new HashMap<>();
    for (String column : _indexSegment.getColumnNames()) {
        DataSource dataSource = _indexSegment.getDataSource(column);
        _dataSourceMap.put(column, dataSource);
    }
}
Also used : File(java.io.File) DataSource(com.linkedin.pinot.core.common.DataSource) BeforeClass(org.testng.annotations.BeforeClass)

Example 17 with DataSource

use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.

the class FilterPlanNode method constructPhysicalOperator.

/**
   * Helper method to build the operator tree from the filter query tree.
   * @param filterQueryTree
   * @param segment Index segment
   * @param optimizeAlwaysFalse Optimize isResultEmpty predicates
   * @return Filter Operator created
   */
@VisibleForTesting
public static BaseFilterOperator constructPhysicalOperator(FilterQueryTree filterQueryTree, IndexSegment segment, boolean optimizeAlwaysFalse) {
    BaseFilterOperator ret;
    if (null == filterQueryTree) {
        return new MatchEntireSegmentOperator(segment.getSegmentMetadata().getTotalRawDocs());
    }
    final List<FilterQueryTree> childFilters = filterQueryTree.getChildren();
    final boolean isLeaf = (childFilters == null) || childFilters.isEmpty();
    if (!isLeaf) {
        int numChildrenAlwaysFalse = 0;
        int numChildren = childFilters.size();
        List<BaseFilterOperator> operators = new ArrayList<>();
        final FilterOperator filterType = filterQueryTree.getOperator();
        for (final FilterQueryTree query : childFilters) {
            BaseFilterOperator childOperator = constructPhysicalOperator(query, segment, optimizeAlwaysFalse);
            // Count number of always false children.
            if (optimizeAlwaysFalse && childOperator.isResultEmpty()) {
                numChildrenAlwaysFalse++;
                // Early bailout for 'AND' as soon as one of the children always evaluates to false.
                if (filterType == FilterOperator.AND) {
                    break;
                }
            }
            operators.add(childOperator);
        }
        ret = buildNonLeafOperator(filterType, operators, numChildrenAlwaysFalse, numChildren, optimizeAlwaysFalse);
    } else {
        final FilterOperator filterType = filterQueryTree.getOperator();
        final String column = filterQueryTree.getColumn();
        Predicate predicate = Predicate.newPredicate(filterQueryTree);
        DataSource ds;
        ds = segment.getDataSource(column);
        DataSourceMetadata dataSourceMetadata = ds.getDataSourceMetadata();
        BaseFilterOperator baseFilterOperator;
        int startDocId = 0;
        //end is inclusive
        int endDocId = segment.getSegmentMetadata().getTotalRawDocs() - 1;
        if (dataSourceMetadata.hasInvertedIndex()) {
            // range evaluation based on inv index is inefficient, so do this only if is NOT range.
            if (!filterType.equals(FilterOperator.RANGE)) {
                if (dataSourceMetadata.isSingleValue() && dataSourceMetadata.isSorted()) {
                    // if the column is sorted use sorted inverted index based implementation
                    baseFilterOperator = new SortedInvertedIndexBasedFilterOperator(predicate, ds, startDocId, endDocId);
                } else {
                    baseFilterOperator = new BitmapBasedFilterOperator(predicate, ds, startDocId, endDocId);
                }
            } else {
                baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
            }
        } else {
            baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
        }
        ret = baseFilterOperator;
    }
    // If operator evaluates to false, then just return an empty operator.
    if (ret.isResultEmpty()) {
        ret = new EmptyFilterOperator();
    }
    return ret;
}
Also used : FilterQueryTree(com.linkedin.pinot.common.utils.request.FilterQueryTree) ArrayList(java.util.ArrayList) ScanBasedFilterOperator(com.linkedin.pinot.core.operator.filter.ScanBasedFilterOperator) Predicate(com.linkedin.pinot.core.common.Predicate) DataSource(com.linkedin.pinot.core.common.DataSource) MatchEntireSegmentOperator(com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator) BaseFilterOperator(com.linkedin.pinot.core.operator.filter.BaseFilterOperator) BitmapBasedFilterOperator(com.linkedin.pinot.core.operator.filter.BitmapBasedFilterOperator) ScanBasedFilterOperator(com.linkedin.pinot.core.operator.filter.ScanBasedFilterOperator) EmptyFilterOperator(com.linkedin.pinot.core.operator.filter.EmptyFilterOperator) SortedInvertedIndexBasedFilterOperator(com.linkedin.pinot.core.operator.filter.SortedInvertedIndexBasedFilterOperator) FilterOperator(com.linkedin.pinot.common.request.FilterOperator) BaseFilterOperator(com.linkedin.pinot.core.operator.filter.BaseFilterOperator) DataSourceMetadata(com.linkedin.pinot.core.common.DataSourceMetadata) BitmapBasedFilterOperator(com.linkedin.pinot.core.operator.filter.BitmapBasedFilterOperator) EmptyFilterOperator(com.linkedin.pinot.core.operator.filter.EmptyFilterOperator) SortedInvertedIndexBasedFilterOperator(com.linkedin.pinot.core.operator.filter.SortedInvertedIndexBasedFilterOperator) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 18 with DataSource

use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.

the class RealtimeSegmentImpl method getDataSource.

public DataSource getDataSource(String columnName, Predicate p) {
    DataSource ds = getDataSource(columnName);
    ds.setPredicate(p);
    return ds;
}
Also used : DataSource(com.linkedin.pinot.core.common.DataSource) RealtimeColumnDataSource(com.linkedin.pinot.core.realtime.impl.datasource.RealtimeColumnDataSource)

Example 19 with DataSource

use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.

the class IndexSegmentImpl method iterator.

public Iterator<GenericRow> iterator(final int startDocId, final int endDocId) {
    final Map<String, BlockSingleValIterator> singleValIteratorMap = new HashMap<>();
    final Map<String, BlockMultiValIterator> multiValIteratorMap = new HashMap<>();
    for (String column : getColumnNames()) {
        DataSource dataSource = getDataSource(column);
        BlockValIterator iterator = dataSource.getNextBlock().getBlockValueSet().iterator();
        if (dataSource.getDataSourceMetadata().isSingleValue()) {
            singleValIteratorMap.put(column, (BlockSingleValIterator) iterator);
        } else {
            multiValIteratorMap.put(column, (BlockMultiValIterator) iterator);
        }
    }
    return new Iterator<GenericRow>() {

        int docId = startDocId;

        @Override
        public boolean hasNext() {
            return docId < endDocId;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

        @Override
        public GenericRow next() {
            Map<String, Object> map = new HashMap<>();
            for (String column : singleValIteratorMap.keySet()) {
                int dictId = singleValIteratorMap.get(column).nextIntVal();
                Dictionary dictionary = getDictionaryFor(column);
                map.put(column, dictionary.get(dictId));
            }
            for (String column : multiValIteratorMap.keySet()) {
            //TODO:handle multi value
            }
            GenericRow genericRow = new GenericRow();
            genericRow.init(map);
            docId++;
            return genericRow;
        }
    };
}
Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) HashMap(java.util.HashMap) DataSource(com.linkedin.pinot.core.common.DataSource) GenericRow(com.linkedin.pinot.core.data.GenericRow) BlockMultiValIterator(com.linkedin.pinot.core.common.BlockMultiValIterator) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) Iterator(java.util.Iterator) BlockMultiValIterator(com.linkedin.pinot.core.common.BlockMultiValIterator) BlockValIterator(com.linkedin.pinot.core.common.BlockValIterator) BlockValIterator(com.linkedin.pinot.core.common.BlockValIterator)

Example 20 with DataSource

use of com.linkedin.pinot.core.common.DataSource in project pinot by linkedin.

the class SegmentDumpTool method doMain.

public void doMain(String[] args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    parser.parseArgument(args);
    File segmentDir = new File(segmentPath);
    SegmentMetadata metadata = new SegmentMetadataImpl(segmentDir);
    // All columns by default
    if (columnNames == null) {
        columnNames = new ArrayList<String>(metadata.getSchema().getColumnNames());
        Collections.sort(columnNames);
    }
    IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.mmap);
    Map<String, Dictionary> dictionaries = new HashMap<String, Dictionary>();
    Map<String, BlockSingleValIterator> iterators = new HashMap<String, BlockSingleValIterator>();
    for (String columnName : columnNames) {
        DataSource dataSource = indexSegment.getDataSource(columnName);
        dataSource.open();
        Block block = dataSource.nextBlock();
        BlockValSet blockValSet = block.getBlockValueSet();
        BlockSingleValIterator itr = (BlockSingleValIterator) blockValSet.iterator();
        iterators.put(columnName, itr);
        dictionaries.put(columnName, dataSource.getDictionary());
    }
    System.out.print("Doc\t");
    for (String columnName : columnNames) {
        System.out.print(columnName);
        System.out.print("\t");
    }
    System.out.println();
    for (int i = 0; i < indexSegment.getSegmentMetadata().getTotalDocs(); i++) {
        System.out.print(i);
        System.out.print("\t");
        for (String columnName : columnNames) {
            FieldSpec.DataType columnType = metadata.getSchema().getFieldSpecFor(columnName).getDataType();
            BlockSingleValIterator itr = iterators.get(columnName);
            Integer encodedValue = itr.nextIntVal();
            Object value = dictionaries.get(columnName).get(encodedValue);
            System.out.print(value);
            System.out.print("\t");
        }
        System.out.println();
    }
    if (dumpStarTree) {
        System.out.println();
        File starTreeFile = new File(segmentDir, V1Constants.STAR_TREE_INDEX_FILE);
        StarTreeInterf tree = StarTreeSerDe.fromFile(starTreeFile, ReadMode.mmap);
        tree.printTree();
    }
}
Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) CmdLineParser(org.kohsuke.args4j.CmdLineParser) HashMap(java.util.HashMap) IndexSegment(com.linkedin.pinot.core.indexsegment.IndexSegment) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource) SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) Block(com.linkedin.pinot.core.common.Block) BlockValSet(com.linkedin.pinot.core.common.BlockValSet) StarTreeInterf(com.linkedin.pinot.core.startree.StarTreeInterf) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) File(java.io.File)

Aggregations

DataSource (com.linkedin.pinot.core.common.DataSource)28 Block (com.linkedin.pinot.core.common.Block)21 Test (org.testng.annotations.Test)15 BlockSingleValIterator (com.linkedin.pinot.core.common.BlockSingleValIterator)14 Predicate (com.linkedin.pinot.core.common.Predicate)13 ArrayList (java.util.ArrayList)12 BlockDocIdIterator (com.linkedin.pinot.core.common.BlockDocIdIterator)11 EqPredicate (com.linkedin.pinot.core.common.predicate.EqPredicate)11 RealtimeSegmentImplTest (com.linkedin.pinot.core.realtime.impl.kafka.RealtimeSegmentImplTest)10 NEqPredicate (com.linkedin.pinot.core.common.predicate.NEqPredicate)8 RangePredicate (com.linkedin.pinot.core.common.predicate.RangePredicate)8 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)6 BlockMetadata (com.linkedin.pinot.core.common.BlockMetadata)6 BlockValSet (com.linkedin.pinot.core.common.BlockValSet)5 BitmapBasedFilterOperator (com.linkedin.pinot.core.operator.filter.BitmapBasedFilterOperator)5 ScanBasedFilterOperator (com.linkedin.pinot.core.operator.filter.ScanBasedFilterOperator)5 IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)5 Dictionary (com.linkedin.pinot.core.segment.index.readers.Dictionary)4 HashMap (java.util.HashMap)4 BlockDocIdSet (com.linkedin.pinot.core.common.BlockDocIdSet)3