Search in sources :

Example 51 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class LoaderUtils method writeIndexToV3Format.

/**
   * Write an index file to v3 format single index file and remove the old one.
   *
   * @param segmentWriter v3 format segment writer.
   * @param column column name.
   * @param indexFile index file to write from.
   * @param indexType index type.
   * @throws IOException
   */
public static void writeIndexToV3Format(SegmentDirectory.Writer segmentWriter, String column, File indexFile, ColumnIndexType indexType) throws IOException {
    int fileLength = (int) indexFile.length();
    PinotDataBuffer buffer = null;
    try {
        if (segmentWriter.hasIndexFor(column, indexType)) {
            // Index already exists, try to reuse it.
            buffer = segmentWriter.getIndexFor(column, indexType);
            if (buffer.size() != fileLength) {
                // Throw exception to drop and re-download the segment.
                throw new V3RemoveIndexException("V3 format segment already has " + indexType + " for column: " + column + " that cannot be reused.");
            }
        } else {
            // Index does not exist, create a new buffer for that.
            buffer = segmentWriter.newIndexFor(column, indexType, fileLength);
        }
        buffer.readFrom(indexFile);
    } finally {
        FileUtils.deleteQuietly(indexFile);
        if (buffer != null) {
            buffer.close();
        }
    }
}
Also used : PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer)

Example 52 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class ColumnIndexContainer method loadSorted.

private static ColumnIndexContainer loadSorted(String column, SegmentDirectory.Reader segmentReader, ColumnMetadata metadata, ImmutableDictionaryReader dictionary) throws IOException {
    PinotDataBuffer dataBuffer = segmentReader.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
    FixedByteSingleValueMultiColReader indexReader = new FixedByteSingleValueMultiColReader(dataBuffer, metadata.getCardinality(), 2, new int[] { 4, 4 });
    return new SortedSVColumnIndexContainer(column, metadata, indexReader, dictionary);
}
Also used : FixedByteSingleValueMultiColReader(com.linkedin.pinot.core.io.reader.impl.FixedByteSingleValueMultiColReader) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer)

Example 53 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class ForwardIndexReaderBenchmark method multiValuedReadBenchMarkV1.

public static void multiValuedReadBenchMarkV1(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc, int columnSizeInBits) throws Exception {
    System.out.println("******************************************************************");
    System.out.println("Analyzing " + file.getName() + " numDocs:" + numDocs + ", totalNumValues:" + totalNumValues + ", maxEntriesPerDoc:" + maxEntriesPerDoc + ", numBits:" + columnSizeInBits);
    long start, end;
    boolean readFile = true;
    boolean randomRead = true;
    boolean contextualRead = true;
    boolean signed = false;
    boolean isMmap = false;
    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "benchmarking");
    BaseSingleColumnMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed);
    int[] intArray = new int[maxEntriesPerDoc];
    File outfile = new File("/tmp/" + file.getName() + ".raw");
    FileWriter fw = new FileWriter(outfile);
    for (int i = 0; i < numDocs; i++) {
        int length = reader.getIntArray(i, intArray);
        StringBuilder sb = new StringBuilder();
        String delim = "";
        for (int j = 0; j < length; j++) {
            sb.append(delim);
            sb.append(intArray[j]);
            delim = ",";
        }
        fw.write(sb.toString());
        fw.write("\n");
    }
    fw.close();
    // sequential read
    if (readFile) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        RandomAccessFile raf = new RandomAccessFile(file, "rw");
        ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
        raf.getChannel().read(buffer);
        for (int run = 0; run < MAX_RUNS; run++) {
            long length = file.length();
            start = System.currentTimeMillis();
            for (int i = 0; i < length; i++) {
                byte b = buffer.get(i);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v1 multi value read bytes stats for " + file.getName());
        System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
        raf.close();
    }
    if (randomRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int length = reader.getIntArray(i, intArray);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v1 multi value sequential read one stats for " + file.getName());
        System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (contextualRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (int run = 0; run < MAX_RUNS; run++) {
            MultiValueReaderContext context = (MultiValueReaderContext) reader.createContext();
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int length = reader.getIntArray(i, intArray, context);
            }
            end = System.currentTimeMillis();
            // System.out.println("RUN:" + run + "Time:" + (end-start));
            stats.addValue((end - start));
        }
        System.out.println("v1 multi value sequential read one with context stats for " + file.getName());
        System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    reader.close();
    heapBuffer.close();
    System.out.println("******************************************************************");
}
Also used : MultiValueReaderContext(com.linkedin.pinot.core.io.reader.impl.v1.MultiValueReaderContext) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) FileWriter(java.io.FileWriter) BaseSingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.BaseSingleColumnMultiValueReader) ByteBuffer(java.nio.ByteBuffer) RandomAccessFile(java.io.RandomAccessFile) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 54 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class ForwardIndexReaderBenchmark method singleValuedReadBenchMarkV2.

public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception {
    boolean signed = false;
    boolean isMmap = false;
    long start, end;
    boolean fullScan = true;
    boolean batchRead = true;
    boolean singleRead = true;
    PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "benchmarking");
    com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader(heapBuffer, numDocs, numBits, signed);
    if (fullScan) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
        RandomAccessFile raf = new RandomAccessFile(file, "r");
        raf.getChannel().read(buffer);
        raf.close();
        int[] input = new int[numBits];
        int[] output = new int[32];
        int numBatches = (numDocs + 31) / 32;
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numBatches; i++) {
                for (int j = 0; j < numBits; j++) {
                    input[j] = buffer.getInt(i * numBits * 4 + j * 4);
                }
                BitPacking.fastunpack(input, 0, output, 0, numBits);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 full scan stats for " + file.getName());
        System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (singleRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            for (int i = 0; i < numDocs; i++) {
                int value = reader.getInt(i);
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println(" v2 sequential single read for " + file.getName());
        System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    if (batchRead) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        int batchSize = Math.min(5000, numDocs);
        int[] output = new int[batchSize];
        int[] rowIds = new int[batchSize];
        // sequential read
        for (int run = 0; run < MAX_RUNS; run++) {
            start = System.currentTimeMillis();
            int rowId = 0;
            while (rowId < numDocs) {
                int length = Math.min(batchSize, numDocs - rowId);
                for (int i = 0; i < length; i++) {
                    rowIds[i] = rowId + i;
                }
                reader.getIntBatch(rowIds, output, length);
                rowId = rowId + length;
            }
            end = System.currentTimeMillis();
            stats.addValue((end - start));
        }
        System.out.println("v2 sequential batch read stats for " + file.getName());
        System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
    }
    reader.close();
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) ByteBuffer(java.nio.ByteBuffer) RandomAccessFile(java.io.RandomAccessFile) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer)

Example 55 with PinotDataBuffer

use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.

the class FilePerIndexDirectory method getReadBufferFor.

private PinotDataBuffer getReadBufferFor(IndexKey key) throws IOException {
    if (indexBuffers.containsKey(key)) {
        return indexBuffers.get(key).duplicate();
    }
    File filename = getFileFor(key.name, key.type);
    PinotDataBuffer buffer = mapForReads(filename, key.type.toString() + ".reader");
    indexBuffers.put(key, buffer);
    return buffer.duplicate();
}
Also used : PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) File(java.io.File)

Aggregations

PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)56 File (java.io.File)29 Test (org.testng.annotations.Test)27 Random (java.util.Random)16 FixedByteSingleValueMultiColReader (com.linkedin.pinot.core.io.reader.impl.FixedByteSingleValueMultiColReader)11 FixedByteSingleValueMultiColWriter (com.linkedin.pinot.core.io.writer.impl.FixedByteSingleValueMultiColWriter)8 ChunkDecompressor (com.linkedin.pinot.core.io.compression.ChunkDecompressor)6 ChunkReaderContext (com.linkedin.pinot.core.io.reader.impl.ChunkReaderContext)6 RandomAccessFile (java.io.RandomAccessFile)6 ChunkCompressor (com.linkedin.pinot.core.io.compression.ChunkCompressor)5 FixedByteChunkSingleValueReader (com.linkedin.pinot.core.io.reader.impl.v1.FixedByteChunkSingleValueReader)5 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)5 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)5 BitmapInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader)5 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)5 FileOutputStream (java.io.FileOutputStream)5 FixedByteChunkSingleValueWriter (com.linkedin.pinot.core.io.writer.impl.v1.FixedByteChunkSingleValueWriter)4 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)4 FixedBitSingleValueMultiColReader (com.linkedin.pinot.core.io.reader.impl.FixedBitSingleValueMultiColReader)3 SingleColumnMultiValueWriter (com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter)3