Search in sources :

Example 1 with InvertedIndexCreator

use of com.linkedin.pinot.core.segment.creator.InvertedIndexCreator in project pinot by linkedin.

the class SegmentColumnarIndexCreator method init.

@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
    docIdCounter = 0;
    config = segmentCreationSpec;
    this.indexCreationInfoMap = indexCreationInfoMap;
    dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
    forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
    this.indexCreationInfoMap = indexCreationInfoMap;
    invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
    file = outDir;
    // Check that the output directory does not exist
    if (file.exists()) {
        throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
    }
    file.mkdir();
    this.schema = schema;
    this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
    this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
    this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
    this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
    this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
    this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
    this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
    this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
    // Initialize and build dictionaries
    for (final FieldSpec spec : schema.getAllFieldSpecs()) {
        String column = spec.getName();
        final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
        if (createDictionaryForColumn(info, config, spec)) {
            dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
        }
    }
    // For each column, build its dictionary and initialize a forwards and an inverted index
    for (final String column : indexCreationInfoMap.keySet()) {
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        boolean[] isSorted = new boolean[1];
        isSorted[0] = indexCreationInfo.isSorted();
        SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
        if (dictionaryCreator != null) {
            dictionaryCreator.build(isSorted);
            indexCreationInfo.setSorted(isSorted[0]);
            dictionaryCache.put(column, new HashMap<Object, Object>());
        }
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
        boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
        FieldSpec fieldSpec = schema.getFieldSpecFor(column);
        if (fieldSpec.isSingleValueField()) {
            // Raw indexes store actual values, instead of dictionary ids.
            if (buildRawIndex) {
                forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
            } else {
                if (indexCreationInfo.isSorted()) {
                    forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
                } else {
                    forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
                }
            }
        } else {
            if (buildRawIndex) {
                // TODO: Add support for multi-valued columns.
                throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
            }
            forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
        }
    }
    for (String column : config.getInvertedIndexCreationColumns()) {
        if (!schema.hasColumn(column)) {
            LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
            continue;
        }
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
        invertedIndexCreatorMap.put(column, invertedIndexCreator);
    }
}
Also used : SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator) SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator) ForwardIndexCreator(com.linkedin.pinot.core.segment.creator.ForwardIndexCreator) MultiValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) InvertedIndexCreator(com.linkedin.pinot.core.segment.creator.InvertedIndexCreator) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)1 ColumnIndexCreationInfo (com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo)1 ForwardIndexCreator (com.linkedin.pinot.core.segment.creator.ForwardIndexCreator)1 InvertedIndexCreator (com.linkedin.pinot.core.segment.creator.InvertedIndexCreator)1 MultiValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator)1 SingleValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator)1 MultiValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)1 SingleValueSortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator)1 SingleValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator)1 OffHeapBitmapInvertedIndexCreator (com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator)1