Examples with Dictionary - com.linkedin.pinot.core.segment.index.readers.Dictionary

Example 11 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class PinotSegmentRecordReader method next.

@Override
public GenericRow next(GenericRow row) {
    for (String column : columns) {
        Dictionary dictionary = pinotDictionaryBufferMap.get(column);
        if (isSingleValueMap.get(column)) {
            // Single-value column.
            if (!isSortedMap.get(column)) {
                row.putField(column, dictionary.get(singleValueReaderMap.get(column).getInt(docNumber)));
            } else {
                row.putField(column, dictionary.get(singleValueSortedReaderMap.get(column).getInt(docNumber)));
            }
        } else {
            // Multi-value column.
            int[] dictionaryIdArray = multiValueArrayMap.get(column);
            int numValues = multiValueReaderMap.get(column).getIntArray(docNumber, dictionaryIdArray);
            Object[] objectArray = new Object[numValues];
            for (int i = 0; i < numValues; i++) {
                objectArray[i] = dictionary.get(dictionaryIdArray[i]);
            }
            row.putField(column, objectArray);
        }
    }
    docNumber++;
    return row;
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) IntDictionary(com.linkedin.pinot.core.segment.index.readers.IntDictionary) FloatDictionary(com.linkedin.pinot.core.segment.index.readers.FloatDictionary) LongDictionary(com.linkedin.pinot.core.segment.index.readers.LongDictionary) StringDictionary(com.linkedin.pinot.core.segment.index.readers.StringDictionary) DoubleDictionary(com.linkedin.pinot.core.segment.index.readers.DoubleDictionary)

Example 12 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class StarTreeIndexOperator method processFilterTree.

private void processFilterTree(FilterQueryTree childFilter) {
    String column = childFilter.getColumn();
    // Only equality predicates are supported
    Predicate predicate = Predicate.newPredicate(childFilter);
    Dictionary dictionary = segment.getDataSource(column).getDictionary();
    PredicateEntry predicateEntry = null;
    PredicateEvaluator predicateEvaluator = PredicateEvaluatorProvider.getPredicateFunctionFor(predicate, dictionary);
    // true.
    if (predicateEvaluator.alwaysFalse()) {
        emptyResult = true;
    }
    // Store this predicate, we will have to apply it later
    predicateEntry = new PredicateEntry(predicate, predicateEvaluator);
    predicateColumns.add(column);
    predicatesMap.put(column, predicateEntry);
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) PredicateEvaluator(com.linkedin.pinot.core.operator.filter.predicate.PredicateEvaluator) Predicate(com.linkedin.pinot.core.common.Predicate)

Example 13 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class BaseHllStarTreeIndexTest method computeHll.

/**
   * Compute 'sum' for a given list of metrics, by scanning the given set of doc-ids.
   *
   * @param segment
   * @param docIdIterator
   * @param metricNames
   * @return
   */
private Map<String, long[]> computeHll(IndexSegment segment, BlockDocIdIterator docIdIterator, List<String> metricNames, List<String> groupByColumns) throws Exception {
    int docId;
    int numMetrics = metricNames.size();
    Dictionary[] metricDictionaries = new Dictionary[numMetrics];
    BlockSingleValIterator[] metricValIterators = new BlockSingleValIterator[numMetrics];
    int numGroupByColumns = groupByColumns.size();
    Dictionary[] groupByDictionaries = new Dictionary[numGroupByColumns];
    BlockSingleValIterator[] groupByValIterators = new BlockSingleValIterator[numGroupByColumns];
    for (int i = 0; i < numMetrics; i++) {
        String metricName = metricNames.get(i);
        DataSource dataSource = segment.getDataSource(metricName);
        metricDictionaries[i] = dataSource.getDictionary();
        metricValIterators[i] = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
    }
    for (int i = 0; i < numGroupByColumns; i++) {
        String groupByColumn = groupByColumns.get(i);
        DataSource dataSource = segment.getDataSource(groupByColumn);
        groupByDictionaries[i] = dataSource.getDictionary();
        groupByValIterators[i] = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
    }
    Map<String, HyperLogLog[]> result = new HashMap<>();
    while ((docId = docIdIterator.next()) != Constants.EOF) {
        StringBuilder stringBuilder = new StringBuilder();
        for (int i = 0; i < numGroupByColumns; i++) {
            groupByValIterators[i].skipTo(docId);
            int dictId = groupByValIterators[i].nextIntVal();
            stringBuilder.append(groupByDictionaries[i].getStringValue(dictId));
            stringBuilder.append("_");
        }
        String key = stringBuilder.toString();
        if (!result.containsKey(key)) {
            // init
            HyperLogLog[] initHllArray = new HyperLogLog[numMetrics];
            for (int i = 0; i < numMetrics; i++) {
                initHllArray[i] = new HyperLogLog(HLL_CONFIG.getHllLog2m());
            }
            result.put(key, initHllArray);
        }
        HyperLogLog[] hllSoFar = result.get(key);
        for (int i = 0; i < numMetrics; i++) {
            metricValIterators[i].skipTo(docId);
            int dictId = metricValIterators[i].nextIntVal();
            HyperLogLog value = HllUtil.convertStringToHll(metricDictionaries[i].getStringValue(dictId));
            hllSoFar[i].addAll(value);
        }
    }
    // construct ret
    Map<String, long[]> ret = new HashMap<>();
    for (String key : result.keySet()) {
        long[] valueArray = new long[numMetrics];
        ret.put(key, valueArray);
        for (int i = 0; i < numMetrics; i++) {
            valueArray[i] = result.get(key)[i].cardinality();
        }
    }
    return ret;
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog)

Example 14 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class SegmentQueryProcessor method getMatchingDocIds.

List<Integer> getMatchingDocIds(List<Integer> inputDocIds, FilterOperator filterType, String column, List<String> value) {
    Dictionary dictionaryReader = _indexSegment.getDictionaryFor(column);
    PredicateFilter predicateFilter;
    switch(filterType) {
        case EQUALITY:
            predicateFilter = new EqualsPredicateFilter(dictionaryReader, value.get(0));
            break;
        case NOT:
            predicateFilter = new NotPredicateFilter(dictionaryReader, value.get(0));
            break;
        case IN:
            predicateFilter = new InPredicateFilter(dictionaryReader, value);
            break;
        case NOT_IN:
            predicateFilter = new NotInPredicateFilter(dictionaryReader, value);
            break;
        case RANGE:
            predicateFilter = new RangePredicateFilter(dictionaryReader, value);
            break;
        case REGEX:
        default:
            throw new UnsupportedOperationException("Unsupported filterType:" + filterType);
    }
    return evaluatePredicate(inputDocIds, column, predicateFilter);
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary)

Example 15 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class IndexSegmentImpl method iterator.

public Iterator<GenericRow> iterator(final int startDocId, final int endDocId) {
    final Map<String, BlockSingleValIterator> singleValIteratorMap = new HashMap<>();
    final Map<String, BlockMultiValIterator> multiValIteratorMap = new HashMap<>();
    for (String column : getColumnNames()) {
        DataSource dataSource = getDataSource(column);
        BlockValIterator iterator = dataSource.getNextBlock().getBlockValueSet().iterator();
        if (dataSource.getDataSourceMetadata().isSingleValue()) {
            singleValIteratorMap.put(column, (BlockSingleValIterator) iterator);
        } else {
            multiValIteratorMap.put(column, (BlockMultiValIterator) iterator);
        }
    }
    return new Iterator<GenericRow>() {

        int docId = startDocId;

        @Override
        public boolean hasNext() {
            return docId < endDocId;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

        @Override
        public GenericRow next() {
            Map<String, Object> map = new HashMap<>();
            for (String column : singleValIteratorMap.keySet()) {
                int dictId = singleValIteratorMap.get(column).nextIntVal();
                Dictionary dictionary = getDictionaryFor(column);
                map.put(column, dictionary.get(dictId));
            }
            for (String column : multiValIteratorMap.keySet()) {
            //TODO:handle multi value
            }
            GenericRow genericRow = new GenericRow();
            genericRow.init(map);
            docId++;
            return genericRow;
        }
    };
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) HashMap(java.util.HashMap) DataSource(com.linkedin.pinot.core.common.DataSource) GenericRow(com.linkedin.pinot.core.data.GenericRow) BlockMultiValIterator(com.linkedin.pinot.core.common.BlockMultiValIterator) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) Iterator(java.util.Iterator) BlockMultiValIterator(com.linkedin.pinot.core.common.BlockMultiValIterator) BlockValIterator(com.linkedin.pinot.core.common.BlockValIterator) BlockValIterator(com.linkedin.pinot.core.common.BlockValIterator)

Aggregations

Dictionary (com.linkedin.pinot.core.segment.index.readers.Dictionary)22 BlockSingleValIterator (com.linkedin.pinot.core.common.BlockSingleValIterator)4 DataSource (com.linkedin.pinot.core.common.DataSource)4 HashMap (java.util.HashMap)4 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)2 Pair (com.linkedin.pinot.core.query.utils.Pair)2 HyperLogLog (com.clearspring.analytics.stream.cardinality.HyperLogLog)1 SegmentMetadata (com.linkedin.pinot.common.segment.SegmentMetadata)1 Block (com.linkedin.pinot.core.common.Block)1 BlockMultiValIterator (com.linkedin.pinot.core.common.BlockMultiValIterator)1 BlockValIterator (com.linkedin.pinot.core.common.BlockValIterator)1 BlockValSet (com.linkedin.pinot.core.common.BlockValSet)1 DataSourceMetadata (com.linkedin.pinot.core.common.DataSourceMetadata)1 Predicate (com.linkedin.pinot.core.common.Predicate)1 GenericRow (com.linkedin.pinot.core.data.GenericRow)1 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)1 PredicateEvaluator (com.linkedin.pinot.core.operator.filter.predicate.PredicateEvaluator)1 SingleValueRawIndexCreator (com.linkedin.pinot.core.segment.creator.SingleValueRawIndexCreator)1 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)1 DoubleDictionary (com.linkedin.pinot.core.segment.index.readers.DoubleDictionary)1