Search in sources :

Example 1 with OffHeapBitmapInvertedIndexCreator

use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.

the class InvertedIndexHandler method createInvertedIndexForColumn.

private void createInvertedIndexForColumn(ColumnMetadata columnMetadata) throws IOException {
    String column = columnMetadata.getColumnName();
    File inProgress = new File(indexDir, column + ".inv.inprogress");
    File invertedIndexFile = new File(indexDir, column + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
    if (!inProgress.exists()) {
        if (segmentWriter.hasIndexFor(column, ColumnIndexType.INVERTED_INDEX)) {
            // Skip creating inverted index if already exists.
            LOGGER.info("Found inverted index for segment: {}, column: {}", segmentName, column);
            return;
        }
        // Create a marker file.
        FileUtils.touch(inProgress);
    } else {
        // Marker file exists, which means last run gets interrupted.
        // Remove inverted index if exists.
        // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.
        FileUtils.deleteQuietly(invertedIndexFile);
    }
    // Create new inverted index for the column.
    LOGGER.info("Creating new inverted index for segment: {}, column: {}", segmentName, column);
    int totalDocs = columnMetadata.getTotalDocs();
    OffHeapBitmapInvertedIndexCreator creator = new OffHeapBitmapInvertedIndexCreator(indexDir, columnMetadata.getCardinality(), totalDocs, columnMetadata.getTotalNumberOfEntries(), columnMetadata.getFieldSpec());
    try (DataFileReader fwdIndex = getForwardIndexReader(columnMetadata, segmentWriter)) {
        if (columnMetadata.isSingleValue()) {
            // Single-value column.
            FixedBitSingleValueReader svFwdIndex = (FixedBitSingleValueReader) fwdIndex;
            for (int i = 0; i < totalDocs; i++) {
                creator.add(i, svFwdIndex.getInt(i));
            }
        } else {
            // Multi-value column.
            SingleColumnMultiValueReader mvFwdIndex = (SingleColumnMultiValueReader) fwdIndex;
            int[] dictIds = new int[columnMetadata.getMaxNumberOfMultiValues()];
            for (int i = 0; i < totalDocs; i++) {
                int len = mvFwdIndex.getIntArray(i, dictIds);
                creator.add(i, dictIds, len);
            }
        }
    }
    creator.seal();
    // For v3, write the generated inverted index file into the single file and remove it.
    if (segmentVersion == SegmentVersion.v3) {
        LoaderUtils.writeIndexToV3Format(segmentWriter, column, invertedIndexFile, ColumnIndexType.INVERTED_INDEX);
    }
    // Delete the marker file.
    FileUtils.deleteQuietly(inProgress);
    LOGGER.info("Created inverted index for segment: {}, column: {}", segmentName, column);
}
Also used : DataFileReader(com.linkedin.pinot.core.io.reader.DataFileReader) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) FixedBitSingleValueReader(com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) File(java.io.File)

Example 2 with OffHeapBitmapInvertedIndexCreator

use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.

the class SegmentColumnarIndexCreator method init.

@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
    docIdCounter = 0;
    config = segmentCreationSpec;
    this.indexCreationInfoMap = indexCreationInfoMap;
    dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
    forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
    this.indexCreationInfoMap = indexCreationInfoMap;
    invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
    file = outDir;
    // Check that the output directory does not exist
    if (file.exists()) {
        throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
    }
    file.mkdir();
    this.schema = schema;
    this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
    this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
    this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
    this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
    this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
    this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
    this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
    this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
    // Initialize and build dictionaries
    for (final FieldSpec spec : schema.getAllFieldSpecs()) {
        String column = spec.getName();
        final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
        if (createDictionaryForColumn(info, config, spec)) {
            dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
        }
    }
    // For each column, build its dictionary and initialize a forwards and an inverted index
    for (final String column : indexCreationInfoMap.keySet()) {
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        boolean[] isSorted = new boolean[1];
        isSorted[0] = indexCreationInfo.isSorted();
        SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
        if (dictionaryCreator != null) {
            dictionaryCreator.build(isSorted);
            indexCreationInfo.setSorted(isSorted[0]);
            dictionaryCache.put(column, new HashMap<Object, Object>());
        }
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
        boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
        FieldSpec fieldSpec = schema.getFieldSpecFor(column);
        if (fieldSpec.isSingleValueField()) {
            // Raw indexes store actual values, instead of dictionary ids.
            if (buildRawIndex) {
                forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
            } else {
                if (indexCreationInfo.isSorted()) {
                    forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
                } else {
                    forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
                }
            }
        } else {
            if (buildRawIndex) {
                // TODO: Add support for multi-valued columns.
                throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
            }
            forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
        }
    }
    for (String column : config.getInvertedIndexCreationColumns()) {
        if (!schema.hasColumn(column)) {
            LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
            continue;
        }
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
        invertedIndexCreatorMap.put(column, invertedIndexCreator);
    }
}
Also used : SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator) SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator) ForwardIndexCreator(com.linkedin.pinot.core.segment.creator.ForwardIndexCreator) MultiValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) InvertedIndexCreator(com.linkedin.pinot.core.segment.creator.InvertedIndexCreator) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)

Example 3 with OffHeapBitmapInvertedIndexCreator

use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.

the class BitmapInvertedIndexCreatorTest method testSingleValue.

@Test
public void testSingleValue() throws IOException {
    boolean singleValue = true;
    String colName = "single_value_col";
    FieldSpec spec = new DimensionFieldSpec(colName, DataType.INT, singleValue);
    int numDocs = 20;
    int[] data = new int[numDocs];
    int cardinality = 10;
    File indexDirHeap = new File("/tmp/indexDirHeap");
    FileUtils.forceMkdir(indexDirHeap);
    indexDirHeap.mkdirs();
    File indexDirOffHeap = new File("/tmp/indexDirOffHeap");
    FileUtils.forceMkdir(indexDirOffHeap);
    indexDirOffHeap.mkdirs();
    File bitmapIndexFileOffHeap = new File(indexDirOffHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
    File bitmapIndexFileHeap = new File(indexDirHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
    // GENERATE RANDOM DATA SET
    Random r = new Random();
    Map<Integer, Set<Integer>> postingListMap = new HashMap<>();
    for (int i = 0; i < cardinality; i++) {
        postingListMap.put(i, new LinkedHashSet<Integer>());
    }
    for (int i = 0; i < numDocs; i++) {
        data[i] = r.nextInt(cardinality);
        LOGGER.debug("docId:" + i + "  dictId:" + data[i]);
        postingListMap.get(data[i]).add(i);
    }
    for (int i = 0; i < cardinality; i++) {
        LOGGER.debug("Posting list for " + i + " : " + postingListMap.get(i));
    }
    // GENERATE BITMAP USING OffHeapCreator and validate
    OffHeapBitmapInvertedIndexCreator offHeapCreator = new OffHeapBitmapInvertedIndexCreator(indexDirOffHeap, cardinality, numDocs, numDocs, spec);
    for (int i = 0; i < numDocs; i++) {
        offHeapCreator.add(i, data[i]);
    }
    offHeapCreator.seal();
    validate(colName, bitmapIndexFileOffHeap, cardinality, postingListMap);
    // GENERATE BITMAP USING HeapCreator and validate
    HeapBitmapInvertedIndexCreator heapCreator = new HeapBitmapInvertedIndexCreator(indexDirHeap, cardinality, numDocs, 0, spec);
    for (int i = 0; i < numDocs; i++) {
        heapCreator.add(i, data[i]);
    }
    heapCreator.seal();
    validate(colName, bitmapIndexFileHeap, cardinality, postingListMap);
    // assert that the file sizes and contents are the same
    Assert.assertEquals(bitmapIndexFileHeap.length(), bitmapIndexFileHeap.length());
    Assert.assertTrue(FileUtils.contentEquals(bitmapIndexFileHeap, bitmapIndexFileHeap));
    FileUtils.deleteQuietly(indexDirHeap);
    FileUtils.deleteQuietly(indexDirOffHeap);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) HashMap(java.util.HashMap) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) HeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.HeapBitmapInvertedIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Random(java.util.Random) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 4 with OffHeapBitmapInvertedIndexCreator

use of com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator in project pinot by linkedin.

the class BitmapInvertedIndexCreatorTest method testMultiValue.

@Test
public void testMultiValue() throws IOException {
    boolean singleValue = false;
    String colName = "multi_value_col";
    FieldSpec spec = new DimensionFieldSpec(colName, DataType.INT, singleValue);
    int numDocs = 20;
    int[][] data = new int[numDocs][];
    int maxLength = 10;
    int cardinality = 10;
    File indexDirHeap = new File("/tmp/indexDirHeap");
    FileUtils.forceMkdir(indexDirHeap);
    indexDirHeap.mkdirs();
    File indexDirOffHeap = new File("/tmp/indexDirOffHeap");
    FileUtils.forceMkdir(indexDirOffHeap);
    indexDirOffHeap.mkdirs();
    File bitmapIndexFileOffHeap = new File(indexDirOffHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
    File bitmapIndexFileHeap = new File(indexDirHeap, colName + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
    // GENERATE RANDOM MULTI VALUE DATA SET
    Random r = new Random();
    Map<Integer, Set<Integer>> postingListMap = new HashMap<>();
    for (int i = 0; i < cardinality; i++) {
        postingListMap.put(i, new LinkedHashSet<Integer>());
    }
    int totalNumberOfEntries = 0;
    for (int docId = 0; docId < numDocs; docId++) {
        int length = r.nextInt(maxLength);
        data[docId] = new int[length];
        totalNumberOfEntries += length;
        for (int j = 0; j < length; j++) {
            data[docId][j] = r.nextInt(cardinality);
            postingListMap.get(data[docId][j]).add(docId);
        }
        LOGGER.debug("docId:" + docId + "  dictId:" + data[docId]);
    }
    for (int i = 0; i < cardinality; i++) {
        LOGGER.debug("Posting list for " + i + " : " + postingListMap.get(i));
    }
    // GENERATE BITMAP USING OffHeapCreator and validate
    OffHeapBitmapInvertedIndexCreator offHeapCreator = new OffHeapBitmapInvertedIndexCreator(indexDirOffHeap, cardinality, numDocs, totalNumberOfEntries, spec);
    for (int i = 0; i < numDocs; i++) {
        offHeapCreator.add(i, data[i]);
    }
    offHeapCreator.seal();
    validate(colName, bitmapIndexFileOffHeap, cardinality, postingListMap);
    // GENERATE BITMAP USING HeapCreator and validate
    HeapBitmapInvertedIndexCreator heapCreator = new HeapBitmapInvertedIndexCreator(indexDirHeap, cardinality, numDocs, totalNumberOfEntries, spec);
    for (int i = 0; i < numDocs; i++) {
        heapCreator.add(i, data[i]);
    }
    heapCreator.seal();
    validate(colName, bitmapIndexFileHeap, cardinality, postingListMap);
    // assert that the file sizes and contents are the same
    Assert.assertEquals(bitmapIndexFileHeap.length(), bitmapIndexFileHeap.length());
    Assert.assertTrue(FileUtils.contentEquals(bitmapIndexFileHeap, bitmapIndexFileHeap));
    FileUtils.deleteQuietly(indexDirHeap);
    FileUtils.deleteQuietly(indexDirOffHeap);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) HashMap(java.util.HashMap) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) HeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.HeapBitmapInvertedIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Random(java.util.Random) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Aggregations

OffHeapBitmapInvertedIndexCreator (com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator)4 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)3 File (java.io.File)3 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)2 HeapBitmapInvertedIndexCreator (com.linkedin.pinot.core.segment.creator.impl.inv.HeapBitmapInvertedIndexCreator)2 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Random (java.util.Random)2 Set (java.util.Set)2 Test (org.testng.annotations.Test)2 DataFileReader (com.linkedin.pinot.core.io.reader.DataFileReader)1 SingleColumnMultiValueReader (com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)1 FixedBitSingleValueReader (com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader)1 ColumnIndexCreationInfo (com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo)1 ForwardIndexCreator (com.linkedin.pinot.core.segment.creator.ForwardIndexCreator)1 InvertedIndexCreator (com.linkedin.pinot.core.segment.creator.InvertedIndexCreator)1 MultiValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator)1 SingleValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator)1 MultiValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)1 SingleValueSortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator)1