Search in sources :

Example 46 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class FixedBitRowColDataWriterReaderTest method testSingleColUnsigned.

@Test
public void testSingleColUnsigned() throws Exception {
    int maxBits = 1;
    while (maxBits < 32) {
        LOGGER.debug("START test maxBits:" + maxBits);
        final String fileName = getClass().getName() + "_single_col_fixed_bit_" + maxBits + ".dat";
        final File file = new File(fileName);
        file.delete();
        final int rows = 100;
        final int cols = 1;
        final int[] columnSizesInBits = new int[] { maxBits };
        final FixedBitSingleValueMultiColWriter writer = new FixedBitSingleValueMultiColWriter(file, rows, cols, columnSizesInBits, new boolean[] { true });
        final int[] data = new int[rows];
        final Random r = new Random();
        writer.open();
        final int maxValue = (int) Math.pow(2, maxBits);
        for (int i = 0; i < rows; i++) {
            data[i] = r.nextInt(maxValue) * ((Math.random() > .5) ? 1 : -1);
            writer.setInt(i, 0, data[i]);
        }
        writer.close();
        // Test heap mode
        PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
        FixedBitSingleValueMultiColReader heapReader = new FixedBitSingleValueMultiColReader(heapBuffer, rows, cols, columnSizesInBits, new boolean[] { true });
        for (int i = 0; i < rows; i++) {
            Assert.assertEquals(heapReader.getInt(i, 0), data[i]);
        }
        heapReader.close();
        heapBuffer.close();
        // Test mmap mode
        PinotDataBuffer mmapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "mmap-testing");
        FixedBitSingleValueMultiColReader mmapReader = new FixedBitSingleValueMultiColReader(mmapBuffer, rows, cols, columnSizesInBits, new boolean[] { true });
        for (int i = 0; i < rows; i++) {
            Assert.assertEquals(mmapReader.getInt(i, 0), data[i]);
        }
        mmapReader.close();
        mmapBuffer.close();
        maxBits = maxBits + 1;
        file.delete();
    }
}
Also used : Random(java.util.Random) FixedBitSingleValueMultiColWriter(com.linkedin.pinot.core.io.writer.impl.FixedBitSingleValueMultiColWriter) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) FixedBitSingleValueMultiColReader(com.linkedin.pinot.core.io.reader.impl.FixedBitSingleValueMultiColReader) File(java.io.File) Test(org.testng.annotations.Test)

Example 47 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class FixedBitSingleValueTest method testV2.

@Test
public void testV2() throws Exception {
    int ROWS = 1000;
    for (int numBits = 1; numBits < 32; numBits++) {
        File file = new File(this.getClass().getName() + "_" + numBits + ".test");
        FixedBitSingleValueWriter writer = new FixedBitSingleValueWriter(file, ROWS, numBits);
        int[] data = new int[ROWS];
        Random random = new Random();
        int max = (int) Math.pow(2, numBits);
        for (int i = 0; i < ROWS; i++) {
            data[i] = random.nextInt(max);
            writer.setInt(i, data[i]);
        }
        writer.close();
        PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "testing");
        FixedBitSingleValueReader reader = new FixedBitSingleValueReader(heapBuffer, ROWS, numBits);
        int[] read = new int[ROWS];
        for (int i = 0; i < ROWS; i++) {
            read[i] = reader.getInt(i);
        //Assert.assertEquals(reader.getInt(i), data[i],
        //  "Failed for bit:" + numBits + " Expected " + data[i] + " but found " + reader.getInt(i) + "  at " + i);
        }
        LOGGER.trace(Arrays.toString(data));
        LOGGER.trace(Arrays.toString(read));
        reader.close();
        heapBuffer.close();
        file.delete();
    }
}
Also used : Random(java.util.Random) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) FixedBitSingleValueWriter(com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter) FixedBitSingleValueReader(com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader) File(java.io.File) Test(org.testng.annotations.Test)

Example 48 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class VarByteChunkSingleValueReaderWriteTest method test.

/**
   * This test writes {@link #NUM_STRINGS} using {@link VarByteChunkSingleValueWriter}. It then reads
   * the strings using {@link VarByteChunkSingleValueReader}, and asserts that what was written is the same as
   * what was read in.
   *
   * Number of docs and docs per chunk are chosen to generate complete as well partial chunks.
   *
   * @throws Exception
   */
@Test
public void test() throws Exception {
    String[] expected = new String[NUM_STRINGS];
    Random random = new Random();
    File outFile = new File(TEST_FILE);
    FileUtils.deleteQuietly(outFile);
    int maxStringLengthInBytes = 0;
    for (int i = 0; i < NUM_STRINGS; i++) {
        expected[i] = RandomStringUtils.random(random.nextInt(MAX_STRING_LENGTH));
        maxStringLengthInBytes = Math.max(maxStringLengthInBytes, expected[i].getBytes(UTF_8).length);
    }
    ChunkCompressor compressor = ChunkCompressorFactory.getCompressor("snappy");
    VarByteChunkSingleValueWriter writer = new VarByteChunkSingleValueWriter(outFile, compressor, NUM_STRINGS, NUM_DOCS_PER_CHUNK, maxStringLengthInBytes);
    for (int i = 0; i < NUM_STRINGS; i++) {
        writer.setString(i, expected[i]);
    }
    writer.close();
    PinotDataBuffer pinotDataBuffer = PinotDataBuffer.fromFile(outFile, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, getClass().getName());
    ChunkDecompressor uncompressor = ChunkCompressorFactory.getDecompressor("snappy");
    VarByteChunkSingleValueReader reader = new VarByteChunkSingleValueReader(pinotDataBuffer, uncompressor);
    ChunkReaderContext context = reader.createContext();
    for (int i = 0; i < NUM_STRINGS; i++) {
        String actual = reader.getString(i, context);
        Assert.assertEquals(actual, expected[i]);
    }
    reader.close();
    FileUtils.deleteQuietly(outFile);
}
Also used : ChunkCompressor(com.linkedin.pinot.core.io.compression.ChunkCompressor) Random(java.util.Random) VarByteChunkSingleValueWriter(com.linkedin.pinot.core.io.writer.impl.v1.VarByteChunkSingleValueWriter) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) ChunkDecompressor(com.linkedin.pinot.core.io.compression.ChunkDecompressor) VarByteChunkSingleValueReader(com.linkedin.pinot.core.io.reader.impl.v1.VarByteChunkSingleValueReader) File(java.io.File) ChunkReaderContext(com.linkedin.pinot.core.io.reader.impl.ChunkReaderContext) Test(org.testng.annotations.Test)

Example 49 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class FixedByteSingleColumnMultiValueReaderWriter method close.

@Override
public void close() {
    for (PinotDataBuffer dataBuffer : dataBuffers) {
        dataBuffer.close();
    }
    dataBuffers.clear();
    headerBuffer.close();
    headerBuffer = null;
}
Also used : PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer)

Example 50 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class SegmentFormatConverterV1ToV2 method convert.

@Override
public void convert(File indexSegmentDir) throws Exception {
    SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
    SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
    Set<String> columns = segmentMetadataImpl.getAllColumns();
    SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
        if (columnMetadata.isSorted()) {
            // no need to change sorted forward index
            continue;
        }
        PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
        if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int value = v1Reader.getInt(row);
                v2Writer.setInt(row, value);
            }
            v1Reader.close();
            v2Writer.close();
            File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            // FIXME
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
        if (!columnMetadata.isSingleValue()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
            int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int length = v1Reader.getIntArray(row, values);
                int[] copy = new int[length];
                System.arraycopy(values, 0, copy, 0, length);
                v2Writer.setIntArray(row, copy);
            }
            v1Reader.close();
            v2Writer.close();
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
    }
    File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
    File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
    bis = new BufferedInputStream(new FileInputStream(metadataFile));
    bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
    IOUtils.copy(bis, bos);
    bis.close();
    bos.close();
    final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
    // update the segment version
    properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
    metadataFile.delete();
    properties.save(metadataFile);
}
Also used : SingleColumnMultiValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) SingleColumnSingleValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnSingleValueWriter) BufferedInputStream(java.io.BufferedInputStream) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) BufferedOutputStream(java.io.BufferedOutputStream) SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) FileInputStream(java.io.FileInputStream) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Aggregations

PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)56 File (java.io.File)29 Test (org.testng.annotations.Test)27 Random (java.util.Random)16 FixedByteSingleValueMultiColReader (com.linkedin.pinot.core.io.reader.impl.FixedByteSingleValueMultiColReader)11 FixedByteSingleValueMultiColWriter (com.linkedin.pinot.core.io.writer.impl.FixedByteSingleValueMultiColWriter)8 ChunkDecompressor (com.linkedin.pinot.core.io.compression.ChunkDecompressor)6 ChunkReaderContext (com.linkedin.pinot.core.io.reader.impl.ChunkReaderContext)6 RandomAccessFile (java.io.RandomAccessFile)6 ChunkCompressor (com.linkedin.pinot.core.io.compression.ChunkCompressor)5 FixedByteChunkSingleValueReader (com.linkedin.pinot.core.io.reader.impl.v1.FixedByteChunkSingleValueReader)5 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)5 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)5 BitmapInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader)5 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)5 FileOutputStream (java.io.FileOutputStream)5 FixedByteChunkSingleValueWriter (com.linkedin.pinot.core.io.writer.impl.v1.FixedByteChunkSingleValueWriter)4 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)4 FixedBitSingleValueMultiColReader (com.linkedin.pinot.core.io.reader.impl.FixedBitSingleValueMultiColReader)3 SingleColumnMultiValueWriter (com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter)3