Search in sources :

Example 1 with MultiValueUnsortedForwardIndexCreator

use of com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator in project pinot by linkedin.

the class SegmentColumnarIndexCreator method init.

@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
    docIdCounter = 0;
    config = segmentCreationSpec;
    this.indexCreationInfoMap = indexCreationInfoMap;
    dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
    forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
    this.indexCreationInfoMap = indexCreationInfoMap;
    invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
    file = outDir;
    // Check that the output directory does not exist
    if (file.exists()) {
        throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
    }
    file.mkdir();
    this.schema = schema;
    this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
    this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
    this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
    this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
    this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
    this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
    this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
    this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
    // Initialize and build dictionaries
    for (final FieldSpec spec : schema.getAllFieldSpecs()) {
        String column = spec.getName();
        final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
        if (createDictionaryForColumn(info, config, spec)) {
            dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
        }
    }
    // For each column, build its dictionary and initialize a forwards and an inverted index
    for (final String column : indexCreationInfoMap.keySet()) {
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        boolean[] isSorted = new boolean[1];
        isSorted[0] = indexCreationInfo.isSorted();
        SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
        if (dictionaryCreator != null) {
            dictionaryCreator.build(isSorted);
            indexCreationInfo.setSorted(isSorted[0]);
            dictionaryCache.put(column, new HashMap<Object, Object>());
        }
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
        boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
        FieldSpec fieldSpec = schema.getFieldSpecFor(column);
        if (fieldSpec.isSingleValueField()) {
            // Raw indexes store actual values, instead of dictionary ids.
            if (buildRawIndex) {
                forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
            } else {
                if (indexCreationInfo.isSorted()) {
                    forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
                } else {
                    forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
                }
            }
        } else {
            if (buildRawIndex) {
                // TODO: Add support for multi-valued columns.
                throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
            }
            forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
        }
    }
    for (String column : config.getInvertedIndexCreationColumns()) {
        if (!schema.hasColumn(column)) {
            LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
            continue;
        }
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
        invertedIndexCreatorMap.put(column, invertedIndexCreator);
    }
}
Also used : SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator) SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator) ForwardIndexCreator(com.linkedin.pinot.core.segment.creator.ForwardIndexCreator) MultiValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) InvertedIndexCreator(com.linkedin.pinot.core.segment.creator.InvertedIndexCreator) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)

Example 2 with MultiValueUnsortedForwardIndexCreator

use of com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator in project pinot by linkedin.

the class BaseDefaultColumnHandler method createColumnV1Indices.

/**
   * Helper method to create the V1 indices (dictionary and forward index) for a column.
   *
   * @param column column name.
   */
protected void createColumnV1Indices(String column) throws Exception {
    FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
    Preconditions.checkNotNull(fieldSpec);
    // Generate column index creation information.
    int totalDocs = _segmentMetadata.getTotalDocs();
    int totalRawDocs = _segmentMetadata.getTotalRawDocs();
    int totalAggDocs = totalDocs - totalRawDocs;
    FieldSpec.DataType dataType = fieldSpec.getDataType();
    Object defaultValue = fieldSpec.getDefaultNullValue();
    boolean isSingleValue = fieldSpec.isSingleValueField();
    int maxNumberOfMultiValueElements = isSingleValue ? 0 : 1;
    int dictionaryElementSize = 0;
    Object sortedArray;
    switch(dataType) {
        case STRING:
            Preconditions.checkState(defaultValue instanceof String);
            String stringDefaultValue = (String) defaultValue;
            // Length of the UTF-8 encoded byte array.
            dictionaryElementSize = stringDefaultValue.getBytes("UTF8").length;
            sortedArray = new String[] { stringDefaultValue };
            break;
        case INT:
            Preconditions.checkState(defaultValue instanceof Integer);
            sortedArray = new int[] { (Integer) defaultValue };
            break;
        case LONG:
            Preconditions.checkState(defaultValue instanceof Long);
            sortedArray = new long[] { (Long) defaultValue };
            break;
        case FLOAT:
            Preconditions.checkState(defaultValue instanceof Float);
            sortedArray = new float[] { (Float) defaultValue };
            break;
        case DOUBLE:
            Preconditions.checkState(defaultValue instanceof Double);
            sortedArray = new double[] { (Double) defaultValue };
            break;
        default:
            throw new UnsupportedOperationException("Unsupported data type: " + dataType + " for column: " + column);
    }
    ColumnIndexCreationInfo columnIndexCreationInfo = new ColumnIndexCreationInfo(true, /*createDictionary*/
    defaultValue, /*min*/
    defaultValue, /*max*/
    sortedArray, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.SORTED_INDEX, isSingleValue, /*isSortedColumn*/
    false, /*hasNulls*/
    totalDocs, /*totalNumberOfEntries*/
    maxNumberOfMultiValueElements, -1, /* Unused max length*/
    true, /*isAutoGenerated*/
    defaultValue);
    // Create dictionary.
    // We will have only one value in the dictionary.
    SegmentDictionaryCreator segmentDictionaryCreator = new SegmentDictionaryCreator(false, /*hasNulls*/
    sortedArray, fieldSpec, _indexDir, V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
    segmentDictionaryCreator.build(new boolean[] { true });
    segmentDictionaryCreator.close();
    // Create forward index.
    if (isSingleValue) {
        // Single-value column.
        SingleValueSortedForwardIndexCreator svFwdIndexCreator = new SingleValueSortedForwardIndexCreator(_indexDir, 1, /*cardinality*/
        fieldSpec);
        for (int docId = 0; docId < totalDocs; docId++) {
            svFwdIndexCreator.add(0, /*dictionaryId*/
            docId);
        }
        svFwdIndexCreator.close();
    } else {
        // Multi-value column.
        MultiValueUnsortedForwardIndexCreator mvFwdIndexCreator = new MultiValueUnsortedForwardIndexCreator(fieldSpec, _indexDir, 1, /*cardinality*/
        totalDocs, /*numDocs*/
        totalDocs, /*totalNumberOfValues*/
        false);
        int[] dictionaryIds = { 0 };
        for (int docId = 0; docId < totalDocs; docId++) {
            mvFwdIndexCreator.index(docId, dictionaryIds);
        }
        mvFwdIndexCreator.close();
    }
    // Add the column metadata information to the metadata properties.
    SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, fieldSpec, true, /*hasDictionary*/
    dictionaryElementSize, true, /*hasInvertedIndex*/
    null);
}
Also used : SegmentDictionaryCreator(com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator) SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)2 ColumnIndexCreationInfo (com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo)2 MultiValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)2 SingleValueSortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator)2 ForwardIndexCreator (com.linkedin.pinot.core.segment.creator.ForwardIndexCreator)1 InvertedIndexCreator (com.linkedin.pinot.core.segment.creator.InvertedIndexCreator)1 MultiValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator)1 SingleValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator)1 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)1 SingleValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator)1 OffHeapBitmapInvertedIndexCreator (com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator)1