Examples with Dictionary - com.linkedin.pinot.core.segment.index.readers.Dictionary

Example 1 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class StarTreeJsonNode method build.

private int build(StarTreeIndexNodeInterf indexNode, StarTreeJsonNode json) {
    Iterator<? extends StarTreeIndexNodeInterf> childrenIterator = indexNode.getChildrenIterator();
    if (!childrenIterator.hasNext()) {
        return 0;
    }
    int childDimensionId = indexNode.getChildDimensionName();
    String childDimensionName = dimensionNameToIndexMap.inverse().get(childDimensionId);
    Dictionary dictionary = dictionaries.get(childDimensionName);
    int totalChildNodes = indexNode.getNumChildren();
    Comparator<Pair<String, Integer>> comparator = new Comparator<Pair<String, Integer>>() {

        @Override
        public int compare(Pair<String, Integer> o1, Pair<String, Integer> o2) {
            return -1 * Integer.compare(o1.getRight(), o2.getRight());
        }
    };
    MinMaxPriorityQueue<Pair<String, Integer>> queue = MinMaxPriorityQueue.orderedBy(comparator).maximumSize(MAX_CHILDREN).create();
    StarTreeJsonNode allNode = null;
    while (childrenIterator.hasNext()) {
        StarTreeIndexNodeInterf childIndexNode = childrenIterator.next();
        int childDimensionValueId = childIndexNode.getDimensionValue();
        String childDimensionValue = "ALL";
        if (childDimensionValueId != StarTreeIndexNodeInterf.ALL) {
            childDimensionValue = dictionary.get(childDimensionValueId).toString();
        }
        StarTreeJsonNode childJson = new StarTreeJsonNode(childDimensionValue);
        totalChildNodes += build(childIndexNode, childJson);
        if (childDimensionValueId != StarTreeIndexNodeInterf.ALL) {
            json.addChild(childJson);
            queue.add(ImmutablePair.of(childDimensionValue, totalChildNodes));
        } else {
            allNode = childJson;
        }
    }
    //put ALL node at the end
    if (allNode != null) {
        json.addChild(allNode);
    }
    if (totalChildNodes > MAX_CHILDREN) {
        Iterator<Pair<String, Integer>> qIterator = queue.iterator();
        Set<String> topKDimensions = new HashSet<>();
        topKDimensions.add("ALL");
        while (qIterator.hasNext()) {
            topKDimensions.add(qIterator.next().getKey());
        }
        Iterator<StarTreeJsonNode> iterator = json.getChildren().iterator();
        while (iterator.hasNext()) {
            StarTreeJsonNode next = iterator.next();
            if (!topKDimensions.contains(next.getName())) {
                iterator.remove();
            }
        }
    }
    return totalChildNodes;
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) StarTreeIndexNodeInterf(com.linkedin.pinot.core.startree.StarTreeIndexNodeInterf) Comparator(java.util.Comparator) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) HashSet(java.util.HashSet)

Example 2 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class Projection method transformFromIdToValues.

public ResultTable transformFromIdToValues(ResultTable resultTable, Map<String, Dictionary> dictionaryMap, boolean addCountStar) {
    List<Pair> columnList = resultTable.getColumnList();
    for (ResultTable.Row row : resultTable) {
        int colId = 0;
        for (Object object : row) {
            String column = (String) columnList.get(colId).getFirst();
            Dictionary dictionary = dictionaryMap.get(column);
            if (object instanceof Object[]) {
                Object[] objArray = (Object[]) object;
                Object[] valArray = new Object[objArray.length];
                for (int i = 0; i < objArray.length; ++i) {
                    int dictId = (int) objArray[i];
                    valArray[i] = dictionary.get(dictId);
                }
                row.set(colId, valArray);
            } else {
                int dictId = (int) object;
                row.set(colId, dictionary.get(dictId));
            }
            ++colId;
        }
    }
    // Add additional column for count(*)
    if (addCountStar) {
        for (ResultTable.Row row : resultTable) {
            row.add(1);
        }
        resultTable.addCountStarColumn();
    }
    return resultTable;
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) Pair(com.linkedin.pinot.core.query.utils.Pair)

Example 3 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class SegmentInfoProvider method loadValuesForSingleValueDimension.

/**
   * Helper method to load values for a single-value dimension.
   *
   * @param indexSegment index segment.
   * @param singleValueDimensionValuesMap single-value dimension columns to unique values map buffer.
   * @param column single-value dimension name.
   */
private void loadValuesForSingleValueDimension(IndexSegment indexSegment, Map<String, Set<Object>> singleValueDimensionValuesMap, String column) {
    Dictionary dictionary = indexSegment.getDataSource(column).getDictionary();
    Set<Object> values = singleValueDimensionValuesMap.get(column);
    if (values == null) {
        values = new HashSet<>();
        singleValueDimensionValuesMap.put(column, values);
    }
    int length = dictionary.length();
    for (int i = 0; i < length; i++) {
        values.add(dictionary.get(i));
    }
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary)

Example 4 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class Selection method run.

public ResultTable run() {
    boolean addCountStar = false;
    Map<String, Dictionary> dictionaryMap = new HashMap<>();
    for (Pair pair : _selectionColumns) {
        String column = (String) pair.getFirst();
        if (column.equals("*")) {
            addCountStar = true;
        }
        dictionaryMap.put(column, _indexSegment.getDictionaryFor(column));
    }
    Projection projection = new Projection(_indexSegment, _metadata, _filteredDocIds, _selectionColumns, dictionaryMap, addCountStar);
    return projection.run();
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) HashMap(java.util.HashMap) Pair(com.linkedin.pinot.core.query.utils.Pair)

Example 5 with Dictionary

use of com.linkedin.pinot.core.segment.index.readers.Dictionary in project pinot by linkedin.

the class DictionaryToRawIndexConverter method convertOneColumn.

/**
   * Helper method to perform conversion for the specific column.
   *
   * @param segment Input segment to convert
   * @param column Column to convert
   * @param newSegment Directory where raw index to be written
   * @throws IOException
   */
private void convertOneColumn(IndexSegment segment, String column, File newSegment) throws IOException {
    DataSource dataSource = segment.getDataSource(column);
    Dictionary dictionary = dataSource.getDictionary();
    if (dictionary == null) {
        LOGGER.error("Column '{}' does not have dictionary, cannot convert to raw index.", column);
        return;
    }
    DataSourceMetadata dataSourceMetadata = dataSource.getDataSourceMetadata();
    if (!dataSourceMetadata.isSingleValue()) {
        LOGGER.error("Cannot convert multi-valued columns '{}'", column);
        return;
    }
    int totalDocs = segment.getSegmentMetadata().getTotalDocs();
    BlockSingleValIterator bvIter = (BlockSingleValIterator) dataSource.getNextBlock().getBlockValueSet().iterator();
    FieldSpec.DataType dataType = dataSourceMetadata.getDataType();
    int lengthOfLongestEntry = (dataType == FieldSpec.DataType.STRING) ? getLengthOfLongestEntry(bvIter, dictionary) : -1;
    SingleValueRawIndexCreator rawIndexCreator = SegmentColumnarIndexCreator.getRawIndexCreatorForColumn(newSegment, column, dataType, totalDocs, lengthOfLongestEntry);
    int docId = 0;
    bvIter.reset();
    while (bvIter.hasNext()) {
        int dictId = bvIter.nextIntVal();
        Object value = dictionary.get(dictId);
        rawIndexCreator.index(docId++, value);
        if (docId % 1000000 == 0) {
            LOGGER.info("Converted {} records.", docId);
        }
    }
    rawIndexCreator.close();
    deleteForwardIndex(newSegment.getParentFile(), column, dataSourceMetadata.isSorted());
}

Also used : Dictionary(com.linkedin.pinot.core.segment.index.readers.Dictionary) SingleValueRawIndexCreator(com.linkedin.pinot.core.segment.creator.SingleValueRawIndexCreator) DataSourceMetadata(com.linkedin.pinot.core.common.DataSourceMetadata) BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource)

Aggregations

Dictionary (com.linkedin.pinot.core.segment.index.readers.Dictionary)22 BlockSingleValIterator (com.linkedin.pinot.core.common.BlockSingleValIterator)4 DataSource (com.linkedin.pinot.core.common.DataSource)4 HashMap (java.util.HashMap)4 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)2 Pair (com.linkedin.pinot.core.query.utils.Pair)2 HyperLogLog (com.clearspring.analytics.stream.cardinality.HyperLogLog)1 SegmentMetadata (com.linkedin.pinot.common.segment.SegmentMetadata)1 Block (com.linkedin.pinot.core.common.Block)1 BlockMultiValIterator (com.linkedin.pinot.core.common.BlockMultiValIterator)1 BlockValIterator (com.linkedin.pinot.core.common.BlockValIterator)1 BlockValSet (com.linkedin.pinot.core.common.BlockValSet)1 DataSourceMetadata (com.linkedin.pinot.core.common.DataSourceMetadata)1 Predicate (com.linkedin.pinot.core.common.Predicate)1 GenericRow (com.linkedin.pinot.core.data.GenericRow)1 IndexSegment (com.linkedin.pinot.core.indexsegment.IndexSegment)1 PredicateEvaluator (com.linkedin.pinot.core.operator.filter.predicate.PredicateEvaluator)1 SingleValueRawIndexCreator (com.linkedin.pinot.core.segment.creator.SingleValueRawIndexCreator)1 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)1 DoubleDictionary (com.linkedin.pinot.core.segment.index.readers.DoubleDictionary)1