Search in sources :

Example 1 with SegmentDirectory

use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.

the class SegmentV1V2ToV3FormatConverter method copyIndexData.

private void copyIndexData(File v2Directory, SegmentMetadataImpl v2Metadata, File v3Directory) throws Exception {
    SegmentMetadataImpl v3Metadata = new SegmentMetadataImpl(v3Directory);
    try (SegmentDirectory v2Segment = SegmentDirectory.createFromLocalFS(v2Directory, v2Metadata, ReadMode.mmap);
        SegmentDirectory v3Segment = SegmentDirectory.createFromLocalFS(v3Directory, v3Metadata, ReadMode.mmap)) {
        // for each dictionary and each fwdIndex, copy that to newDirectory buffer
        Set<String> allColumns = v2Metadata.getAllColumns();
        try (SegmentDirectory.Reader v2DataReader = v2Segment.createReader();
            SegmentDirectory.Writer v3DataWriter = v3Segment.createWriter()) {
            for (String column : allColumns) {
                LOGGER.debug("Converting segment: {} , column: {}", v2Directory, column);
                if (v2Metadata.hasDictionary(column)) {
                    copyDictionary(v2DataReader, v3DataWriter, column);
                }
                copyForwardIndex(v2DataReader, v3DataWriter, column);
            }
            // inverted indexes are intentionally stored at the end of the single file
            for (String column : allColumns) {
                copyExistingInvertedIndex(v2DataReader, v3DataWriter, column);
            }
            copyStarTree(v2DataReader, v3DataWriter);
            v3DataWriter.saveAndClose();
        }
    }
}
Also used : SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory)

Example 2 with SegmentDirectory

use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.

the class BitmapPerformanceBenchmark method iterationSpeed.

public static void iterationSpeed(String indexSegmentDir, String column) throws Exception {
    File indexSegment = new File(indexSegmentDir);
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexSegment);
    Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
    Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
    Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
    File bitMapIndexFile = new File(indexSegmentDir, column + ".bitmap.inv");
    ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
    int cardinality = columnMetadata.getCardinality();
    cardinalityMap.put(column, cardinality);
    PinotDataBuffer bitMapDataBuffer = PinotDataBuffer.fromFile(bitMapIndexFile, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
    BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitMapDataBuffer, cardinality);
    File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
    SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegment, segmentMetadata, ReadMode.mmap);
    SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
    ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
    ImmutableDictionaryReader dictionary = container.getDictionary();
    dictionaryMap.put(column, dictionary);
    // System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
    bitMapIndexMap.put(column, bitmapInvertedIndex);
    int dictId = dictionary.indexOf("na.us");
    ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
    Iterator<Integer> iterator = immutable.iterator();
    int count = 0;
    long start = System.currentTimeMillis();
    while (iterator.hasNext()) {
        iterator.next();
        count = count + 1;
    }
    long end = System.currentTimeMillis();
    System.out.println(" matched: " + count + " Time to iterate:" + (end - start));
    bitMapDataBuffer.close();
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) HashMap(java.util.HashMap) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) BitmapInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) ColumnIndexContainer(com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) File(java.io.File)

Example 3 with SegmentDirectory

use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.

the class BitmapPerformanceBenchmark method benchmarkIntersetionAndUnion.

public static void benchmarkIntersetionAndUnion(String indexSegmentDir) throws ConfigurationException, IOException, Exception {
    File[] listFiles = new File(indexSegmentDir).listFiles();
    File indexDir = new File(indexSegmentDir);
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexDir);
    Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
    Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
    Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
    for (File file : listFiles) {
        if (!file.getName().endsWith("bitmap.inv")) {
            continue;
        }
        String column = file.getName().replaceAll(".bitmap.inv", "");
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        int cardinality = columnMetadata.getCardinality();
        cardinalityMap.put(column, cardinality);
        System.out.println(column + "\t\t\t" + cardinality + "  \t" + columnMetadata.getDataType());
        PinotDataBuffer bitmapDataBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
        BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitmapDataBuffer, cardinality);
        File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
        SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexDir, segmentMetadata, ReadMode.mmap);
        SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
        ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
        ImmutableDictionaryReader dictionary = container.getDictionary();
        if (columnMetadata.getDataType() == DataType.INT) {
            System.out.println("BitmapPerformanceBenchmark.main()");
            assert dictionary instanceof IntDictionary;
        }
        dictionaryMap.put(column, dictionary);
        // System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
        bitMapIndexMap.put(column, bitmapInvertedIndex);
        bitmapDataBuffer.close();
    }
    List<String> dimensionNamesList = segmentMetadata.getSchema().getDimensionNames();
    Collections.shuffle(dimensionNamesList);
    int NUM_TEST = 100;
    final int MAX_DIMENSIONS_PER_DIMENSION = 1;
    int MAX_DIMENSIONS_IN_WHERE_CLAUSE = 3;
    Random random = new Random();
    for (int numDimensions = 1; numDimensions <= MAX_DIMENSIONS_IN_WHERE_CLAUSE; numDimensions++) {
        for (int numValuesPerDimension = 1; numValuesPerDimension <= MAX_DIMENSIONS_PER_DIMENSION; numValuesPerDimension++) {
            int runCount = 0;
            while (runCount < NUM_TEST) {
                Collections.shuffle(dimensionNamesList);
                List<ImmutableRoaringBitmap> bitMaps = new ArrayList<ImmutableRoaringBitmap>();
                List<String> columnNameValuePairs = new ArrayList<String>();
                for (int i = 0; i < numDimensions; i++) {
                    String columnName = dimensionNamesList.get(i);
                    InvertedIndexReader bitmapInvertedIndex = bitMapIndexMap.get(columnName);
                    for (int j = 0; j < numValuesPerDimension; j++) {
                        int dictId = random.nextInt(cardinalityMap.get(columnName));
                        String dictValue = dictionaryMap.get(columnName).getStringValue(dictId);
                        columnNameValuePairs.add(columnName + ":" + dictValue);
                        ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
                        bitMaps.add(immutable);
                    }
                }
                System.out.println("START**********************************");
                int[] cardinality = new int[bitMaps.size()];
                int[] sizes = new int[bitMaps.size()];
                for (int i = 0; i < bitMaps.size(); i++) {
                    ImmutableRoaringBitmap immutableRoaringBitmap = bitMaps.get(i);
                    cardinality[i] = immutableRoaringBitmap.getCardinality();
                    sizes[i] = immutableRoaringBitmap.getSizeInBytes();
                }
                System.out.println("\t#bitmaps:" + bitMaps.size());
                System.out.println("\tinput values:" + columnNameValuePairs);
                System.out.println("\tinput cardinality:" + Arrays.toString(cardinality));
                System.out.println("\tinput sizes:" + Arrays.toString(sizes));
                and(bitMaps);
                or(bitMaps);
                System.out.println("END**********************************");
                runCount = runCount + 1;
            }
        }
    }
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) HashMap(java.util.HashMap) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) ArrayList(java.util.ArrayList) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) IntDictionary(com.linkedin.pinot.core.segment.index.readers.IntDictionary) Random(java.util.Random) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) BitmapInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader) ColumnIndexContainer(com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) BitmapInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader) InvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.InvertedIndexReader) File(java.io.File)

Example 4 with SegmentDirectory

use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.

the class LoadersTest method testPadding.

@Test
public void testPadding() throws Exception {
    // Old Format
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_OLD))), INDEX_DIR);
    File segmentDirectory = new File(INDEX_DIR, "paddingOld");
    SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
    SegmentDirectory segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    ColumnMetadata columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    SegmentDirectory.Reader reader = segmentDir.createReader();
    PinotDataBuffer dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    StringDictionary dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
    Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
    Assert.assertEquals(dict.get(0), "lynda 2.0");
    Assert.assertEquals(dict.get(1), "lynda");
    Assert.assertEquals(dict.indexOf("lynda%"), 1);
    Assert.assertEquals(dict.indexOf("lynda%%"), 1);
    // New Format Padding character %
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_PERCENT))), INDEX_DIR);
    segmentDirectory = new File(INDEX_DIR, "paddingPercent");
    originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
    segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    reader = segmentDir.createReader();
    dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
    Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
    Assert.assertEquals(dict.get(0), "lynda 2.0");
    Assert.assertEquals(dict.get(1), "lynda");
    Assert.assertEquals(dict.indexOf("lynda%"), 1);
    Assert.assertEquals(dict.indexOf("lynda%%"), 1);
    // New Format Padding character Null
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_NULL))), INDEX_DIR);
    segmentDirectory = new File(INDEX_DIR, "paddingNull");
    originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
    segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    reader = segmentDir.createReader();
    dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda\0\0\0\0");
    Assert.assertEquals(dict.getStringValue(1), "lynda 2.0");
    Assert.assertEquals(dict.get(0), "lynda");
    Assert.assertEquals(dict.get(1), "lynda 2.0");
    Assert.assertEquals(dict.indexOf("lynda\0"), 0);
    Assert.assertEquals(dict.indexOf("lynda\0\0"), 0);
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) File(java.io.File) StringDictionary(com.linkedin.pinot.core.segment.index.readers.StringDictionary) Test(org.testng.annotations.Test)

Example 5 with SegmentDirectory

use of com.linkedin.pinot.core.segment.store.SegmentDirectory in project pinot by linkedin.

the class SegmentPreProcessorTest method checkInvertedIndexCreation.

private void checkInvertedIndexCreation(File segmentDirectoryFile, SegmentMetadataImpl segmentMetadata, boolean reCreate) throws Exception {
    try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentDirectoryFile, segmentMetadata, ReadMode.mmap);
        SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
        if (reCreate) {
            Assert.assertTrue(reader.hasIndexFor(COLUMN1_NAME, ColumnIndexType.INVERTED_INDEX));
            Assert.assertTrue(reader.hasIndexFor(COLUMN13_NAME, ColumnIndexType.INVERTED_INDEX));
            Assert.assertTrue(reader.hasIndexFor(COLUMN7_NAME, ColumnIndexType.INVERTED_INDEX));
            Assert.assertFalse(reader.hasIndexFor(NO_SUCH_COLUMN_NAME, ColumnIndexType.INVERTED_INDEX));
        } else {
            Assert.assertFalse(reader.hasIndexFor(COLUMN1_NAME, ColumnIndexType.INVERTED_INDEX));
            Assert.assertTrue(reader.hasIndexFor(COLUMN7_NAME, ColumnIndexType.INVERTED_INDEX));
            Assert.assertFalse(reader.hasIndexFor(COLUMN13_NAME, ColumnIndexType.INVERTED_INDEX));
            Assert.assertFalse(reader.hasIndexFor(NO_SUCH_COLUMN_NAME, ColumnIndexType.INVERTED_INDEX));
        }
    }
    SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, null);
    processor.process();
    try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentDirectoryFile, segmentMetadata, ReadMode.mmap);
        SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
        Assert.assertTrue(reader.hasIndexFor(COLUMN1_NAME, ColumnIndexType.INVERTED_INDEX));
        Assert.assertTrue(reader.hasIndexFor(COLUMN13_NAME, ColumnIndexType.INVERTED_INDEX));
        Assert.assertTrue(reader.hasIndexFor(COLUMN7_NAME, ColumnIndexType.INVERTED_INDEX));
        Assert.assertFalse(reader.hasIndexFor(NO_SUCH_COLUMN_NAME, ColumnIndexType.INVERTED_INDEX));
    }
}
Also used : SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory)

Aggregations

SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)8 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)7 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)5 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)5 File (java.io.File)5 ColumnIndexContainer (com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer)2 BitmapInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader)2 ImmutableDictionaryReader (com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)2 HashMap (java.util.HashMap)2 ImmutableRoaringBitmap (org.roaringbitmap.buffer.ImmutableRoaringBitmap)2 Test (org.testng.annotations.Test)2 SingleColumnMultiValueReader (com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)1 SingleColumnSingleValueReader (com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader)1 SingleColumnMultiValueWriter (com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter)1 SingleColumnSingleValueWriter (com.linkedin.pinot.core.io.writer.SingleColumnSingleValueWriter)1 SegmentV1V2ToV3FormatConverter (com.linkedin.pinot.core.segment.index.converter.SegmentV1V2ToV3FormatConverter)1 IntDictionary (com.linkedin.pinot.core.segment.index.readers.IntDictionary)1 InvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.InvertedIndexReader)1 StringDictionary (com.linkedin.pinot.core.segment.index.readers.StringDictionary)1 BufferedInputStream (java.io.BufferedInputStream)1