Search in sources :

Example 1 with SingleColumnMultiValueReader

use of com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader in project pinot by linkedin.

the class InvertedIndexHandler method createInvertedIndexForColumn.

private void createInvertedIndexForColumn(ColumnMetadata columnMetadata) throws IOException {
    String column = columnMetadata.getColumnName();
    File inProgress = new File(indexDir, column + ".inv.inprogress");
    File invertedIndexFile = new File(indexDir, column + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
    if (!inProgress.exists()) {
        if (segmentWriter.hasIndexFor(column, ColumnIndexType.INVERTED_INDEX)) {
            // Skip creating inverted index if already exists.
            LOGGER.info("Found inverted index for segment: {}, column: {}", segmentName, column);
            return;
        }
        // Create a marker file.
        FileUtils.touch(inProgress);
    } else {
        // Marker file exists, which means last run gets interrupted.
        // Remove inverted index if exists.
        // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.
        FileUtils.deleteQuietly(invertedIndexFile);
    }
    // Create new inverted index for the column.
    LOGGER.info("Creating new inverted index for segment: {}, column: {}", segmentName, column);
    int totalDocs = columnMetadata.getTotalDocs();
    OffHeapBitmapInvertedIndexCreator creator = new OffHeapBitmapInvertedIndexCreator(indexDir, columnMetadata.getCardinality(), totalDocs, columnMetadata.getTotalNumberOfEntries(), columnMetadata.getFieldSpec());
    try (DataFileReader fwdIndex = getForwardIndexReader(columnMetadata, segmentWriter)) {
        if (columnMetadata.isSingleValue()) {
            // Single-value column.
            FixedBitSingleValueReader svFwdIndex = (FixedBitSingleValueReader) fwdIndex;
            for (int i = 0; i < totalDocs; i++) {
                creator.add(i, svFwdIndex.getInt(i));
            }
        } else {
            // Multi-value column.
            SingleColumnMultiValueReader mvFwdIndex = (SingleColumnMultiValueReader) fwdIndex;
            int[] dictIds = new int[columnMetadata.getMaxNumberOfMultiValues()];
            for (int i = 0; i < totalDocs; i++) {
                int len = mvFwdIndex.getIntArray(i, dictIds);
                creator.add(i, dictIds, len);
            }
        }
    }
    creator.seal();
    // For v3, write the generated inverted index file into the single file and remove it.
    if (segmentVersion == SegmentVersion.v3) {
        LoaderUtils.writeIndexToV3Format(segmentWriter, column, invertedIndexFile, ColumnIndexType.INVERTED_INDEX);
    }
    // Delete the marker file.
    FileUtils.deleteQuietly(inProgress);
    LOGGER.info("Created inverted index for segment: {}, column: {}", segmentName, column);
}
Also used : DataFileReader(com.linkedin.pinot.core.io.reader.DataFileReader) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) FixedBitSingleValueReader(com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) File(java.io.File)

Example 2 with SingleColumnMultiValueReader

use of com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader in project pinot by linkedin.

the class SegmentFormatConverterV1ToV2 method convert.

@Override
public void convert(File indexSegmentDir) throws Exception {
    SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
    SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
    Set<String> columns = segmentMetadataImpl.getAllColumns();
    SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
        if (columnMetadata.isSorted()) {
            // no need to change sorted forward index
            continue;
        }
        PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
        if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int value = v1Reader.getInt(row);
                v2Writer.setInt(row, value);
            }
            v1Reader.close();
            v2Writer.close();
            File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            // FIXME
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
        if (!columnMetadata.isSingleValue()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
            int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int length = v1Reader.getIntArray(row, values);
                int[] copy = new int[length];
                System.arraycopy(values, 0, copy, 0, length);
                v2Writer.setIntArray(row, copy);
            }
            v1Reader.close();
            v2Writer.close();
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
    }
    File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
    File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
    bis = new BufferedInputStream(new FileInputStream(metadataFile));
    bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
    IOUtils.copy(bis, bos);
    bis.close();
    bos.close();
    final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
    // update the segment version
    properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
    metadataFile.delete();
    properties.save(metadataFile);
}
Also used : SingleColumnMultiValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) SingleColumnSingleValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnSingleValueWriter) BufferedInputStream(java.io.BufferedInputStream) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) BufferedOutputStream(java.io.BufferedOutputStream) SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) FileInputStream(java.io.FileInputStream) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 3 with SingleColumnMultiValueReader

use of com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader in project pinot by linkedin.

the class ColumnDataSourceImpl method getNextBlock.

@Override
public Block getNextBlock(BlockId blockId) {
    Block b = null;
    ColumnMetadata columnMetadata = indexContainer.getColumnMetadata();
    if (columnMetadata.isSingleValue()) {
        // TODO: Support sorted index without dictionary.
        if (columnMetadata.hasDictionary() && columnMetadata.isSorted()) {
            b = new SortedSingleValueBlock(blockId, (SortedForwardIndexReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
        } else {
            b = new UnSortedSingleValueBlock(blockId, (SingleColumnSingleValueReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
        }
    } else {
        b = new MultiValueBlock(blockId, (SingleColumnMultiValueReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
    }
    return b;
}
Also used : SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) UnSortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.UnSortedSingleValueBlock) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SortedForwardIndexReader(com.linkedin.pinot.core.io.reader.impl.SortedForwardIndexReader) SortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.SortedSingleValueBlock) UnSortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.UnSortedSingleValueBlock) Block(com.linkedin.pinot.core.common.Block) MultiValueBlock(com.linkedin.pinot.core.operator.blocks.MultiValueBlock) SortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.SortedSingleValueBlock) UnSortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.UnSortedSingleValueBlock) MultiValueBlock(com.linkedin.pinot.core.operator.blocks.MultiValueBlock) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)

Example 4 with SingleColumnMultiValueReader

use of com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader in project pinot by linkedin.

the class FixedBitMultiValueTest method testSingleColMultiValueWithContext.

public void testSingleColMultiValueWithContext(Class<? extends SingleColumnMultiValueWriter> writerClazz, Class<? extends SingleColumnMultiValueReader<? extends ReaderContext>> readerClazz) throws Exception {
    LOGGER.info("Testing for writerClazz:{} readerClass:{}", writerClazz.getName(), readerClazz.getName());
    Constructor<? extends SingleColumnMultiValueWriter> writerClazzConstructor = writerClazz.getConstructor(new Class[] { File.class, int.class, int.class, int.class });
    Constructor<? extends SingleColumnMultiValueReader<? extends ReaderContext>> readerClazzConstructor = readerClazz.getConstructor(new Class[] { PinotDataBuffer.class, int.class, int.class, int.class, boolean.class });
    int maxBits = 1;
    while (maxBits < 32) {
        final String fileName = getClass().getName() + "_test_single_col_mv_fixed_bit.dat";
        final File f = new File(fileName);
        f.delete();
        int numDocs = 10;
        int maxNumValues = 100;
        final int[][] data = new int[numDocs][];
        final Random r = new Random();
        final int maxValue = (int) Math.pow(2, maxBits);
        int totalNumValues = 0;
        int[] startOffsets = new int[numDocs];
        int[] lengths = new int[numDocs];
        for (int i = 0; i < data.length; i++) {
            final int numValues = r.nextInt(maxNumValues) + 1;
            data[i] = new int[numValues];
            for (int j = 0; j < numValues; j++) {
                data[i][j] = r.nextInt(maxValue);
            }
            startOffsets[i] = totalNumValues;
            lengths[i] = numValues;
            totalNumValues = totalNumValues + numValues;
        }
        SingleColumnMultiValueWriter writer = writerClazzConstructor.newInstance(new Object[] { f, numDocs, totalNumValues, maxBits });
        for (int i = 0; i < data.length; i++) {
            writer.setIntArray(i, data[i]);
        }
        writer.close();
        final RandomAccessFile raf = new RandomAccessFile(f, "rw");
        raf.close();
        // Test heap mode
        PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(f, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
        SingleColumnMultiValueReader heapReader = readerClazzConstructor.newInstance(new Object[] { heapBuffer, numDocs, totalNumValues, maxBits, false });
        ReaderContext context = heapReader.createContext();
        final int[] readValues = new int[maxNumValues];
        for (int i = 0; i < data.length; i++) {
            final int numValues = heapReader.getIntArray(i, readValues, context);
            if (numValues != data[i].length) {
                System.err.println("Failed Expected:" + data[i].length + " Actual:" + numValues);
                int length = heapReader.getIntArray(i, readValues, context);
            }
            Assert.assertEquals(numValues, data[i].length);
            for (int j = 0; j < numValues; j++) {
                Assert.assertEquals(readValues[j], data[i][j]);
            }
        }
        heapReader.close();
        heapBuffer.close();
        // Test mmap mode
        PinotDataBuffer mmapBuffer = PinotDataBuffer.fromFile(f, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
        SingleColumnMultiValueReader<? extends ReaderContext> mmapReader = readerClazzConstructor.newInstance(new Object[] { mmapBuffer, numDocs, totalNumValues, maxBits, false });
        for (int i = 0; i < data.length; i++) {
            final int numValues = mmapReader.getIntArray(i, readValues);
            Assert.assertEquals(numValues, data[i].length);
            for (int j = 0; j < numValues; j++) {
                Assert.assertEquals(readValues[j], data[i][j]);
            }
        }
        mmapReader.close();
        mmapBuffer.close();
        f.delete();
        maxBits = maxBits + 1;
    }
    LOGGER.info("DONE: Testing for writerClazz:{} readerClass:{}", writerClazz.getName(), readerClazz.getName());
}
Also used : SingleColumnMultiValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter) Random(java.util.Random) RandomAccessFile(java.io.RandomAccessFile) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) ReaderContext(com.linkedin.pinot.core.io.reader.ReaderContext) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 5 with SingleColumnMultiValueReader

use of com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader in project pinot by linkedin.

the class IntArraysTest method test1.

@Test
public void test1() throws Exception {
    final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR.listFiles()[0], ReadMode.heap);
    final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR.listFiles()[0], ReadMode.mmap);
    final Map<String, ColumnMetadata> metadataMap = ((SegmentMetadataImpl) heapSegment.getSegmentMetadata()).getColumnMetadataMap();
    for (final String column : metadataMap.keySet()) {
        final DataFileReader heapArray = heapSegment.getForwardIndexReaderFor(column);
        final DataFileReader mmapArray = mmapSegment.getForwardIndexReaderFor(column);
        if (metadataMap.get(column).isSingleValue()) {
            final SingleColumnSingleValueReader svHeapReader = (SingleColumnSingleValueReader) heapArray;
            final SingleColumnSingleValueReader mvMmapReader = (SingleColumnSingleValueReader) mmapArray;
            for (int i = 0; i < metadataMap.get(column).getTotalDocs(); i++) {
                Assert.assertEquals(mvMmapReader.getInt(i), svHeapReader.getInt(i));
            }
        } else {
            final SingleColumnMultiValueReader svHeapReader = (SingleColumnMultiValueReader) heapArray;
            final SingleColumnMultiValueReader mvMmapReader = (SingleColumnMultiValueReader) mmapArray;
            for (int i = 0; i < metadataMap.get(column).getTotalDocs(); i++) {
                final int[] i_1 = new int[1000];
                final int[] j_i = new int[1000];
                Assert.assertEquals(mvMmapReader.getIntArray(i, j_i), svHeapReader.getIntArray(i, i_1));
            }
        }
    }
}
Also used : SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) DataFileReader(com.linkedin.pinot.core.io.reader.DataFileReader) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) Test(org.testng.annotations.Test)

Aggregations

SingleColumnMultiValueReader (com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)5 SingleColumnSingleValueReader (com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader)3 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)3 File (java.io.File)3 DataFileReader (com.linkedin.pinot.core.io.reader.DataFileReader)2 SingleColumnMultiValueWriter (com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter)2 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)2 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)2 Block (com.linkedin.pinot.core.common.Block)1 ReaderContext (com.linkedin.pinot.core.io.reader.ReaderContext)1 SortedForwardIndexReader (com.linkedin.pinot.core.io.reader.impl.SortedForwardIndexReader)1 FixedBitSingleValueReader (com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader)1 SingleColumnSingleValueWriter (com.linkedin.pinot.core.io.writer.SingleColumnSingleValueWriter)1 MultiValueBlock (com.linkedin.pinot.core.operator.blocks.MultiValueBlock)1 SortedSingleValueBlock (com.linkedin.pinot.core.operator.blocks.SortedSingleValueBlock)1 UnSortedSingleValueBlock (com.linkedin.pinot.core.operator.blocks.UnSortedSingleValueBlock)1 OffHeapBitmapInvertedIndexCreator (com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator)1 IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)1 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)1 BufferedInputStream (java.io.BufferedInputStream)1