use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class LoaderUtils method writeIndexToV3Format.
/**
* Write an index file to v3 format single index file and remove the old one.
*
* @param segmentWriter v3 format segment writer.
* @param column column name.
* @param indexFile index file to write from.
* @param indexType index type.
* @throws IOException
*/
public static void writeIndexToV3Format(SegmentDirectory.Writer segmentWriter, String column, File indexFile, ColumnIndexType indexType) throws IOException {
int fileLength = (int) indexFile.length();
PinotDataBuffer buffer = null;
try {
if (segmentWriter.hasIndexFor(column, indexType)) {
// Index already exists, try to reuse it.
buffer = segmentWriter.getIndexFor(column, indexType);
if (buffer.size() != fileLength) {
// Throw exception to drop and re-download the segment.
throw new V3RemoveIndexException("V3 format segment already has " + indexType + " for column: " + column + " that cannot be reused.");
}
} else {
// Index does not exist, create a new buffer for that.
buffer = segmentWriter.newIndexFor(column, indexType, fileLength);
}
buffer.readFrom(indexFile);
} finally {
FileUtils.deleteQuietly(indexFile);
if (buffer != null) {
buffer.close();
}
}
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class ColumnIndexContainer method loadSorted.
private static ColumnIndexContainer loadSorted(String column, SegmentDirectory.Reader segmentReader, ColumnMetadata metadata, ImmutableDictionaryReader dictionary) throws IOException {
PinotDataBuffer dataBuffer = segmentReader.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
FixedByteSingleValueMultiColReader indexReader = new FixedByteSingleValueMultiColReader(dataBuffer, metadata.getCardinality(), 2, new int[] { 4, 4 });
return new SortedSVColumnIndexContainer(column, metadata, indexReader, dictionary);
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class ForwardIndexReaderBenchmark method multiValuedReadBenchMarkV1.
public static void multiValuedReadBenchMarkV1(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc, int columnSizeInBits) throws Exception {
System.out.println("******************************************************************");
System.out.println("Analyzing " + file.getName() + " numDocs:" + numDocs + ", totalNumValues:" + totalNumValues + ", maxEntriesPerDoc:" + maxEntriesPerDoc + ", numBits:" + columnSizeInBits);
long start, end;
boolean readFile = true;
boolean randomRead = true;
boolean contextualRead = true;
boolean signed = false;
boolean isMmap = false;
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "benchmarking");
BaseSingleColumnMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed);
int[] intArray = new int[maxEntriesPerDoc];
File outfile = new File("/tmp/" + file.getName() + ".raw");
FileWriter fw = new FileWriter(outfile);
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray);
StringBuilder sb = new StringBuilder();
String delim = "";
for (int j = 0; j < length; j++) {
sb.append(delim);
sb.append(intArray[j]);
delim = ",";
}
fw.write(sb.toString());
fw.write("\n");
}
fw.close();
// sequential read
if (readFile) {
DescriptiveStatistics stats = new DescriptiveStatistics();
RandomAccessFile raf = new RandomAccessFile(file, "rw");
ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
raf.getChannel().read(buffer);
for (int run = 0; run < MAX_RUNS; run++) {
long length = file.length();
start = System.currentTimeMillis();
for (int i = 0; i < length; i++) {
byte b = buffer.get(i);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v1 multi value read bytes stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
raf.close();
}
if (randomRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v1 multi value sequential read one stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
if (contextualRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int run = 0; run < MAX_RUNS; run++) {
MultiValueReaderContext context = (MultiValueReaderContext) reader.createContext();
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray, context);
}
end = System.currentTimeMillis();
// System.out.println("RUN:" + run + "Time:" + (end-start));
stats.addValue((end - start));
}
System.out.println("v1 multi value sequential read one with context stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
reader.close();
heapBuffer.close();
System.out.println("******************************************************************");
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class ForwardIndexReaderBenchmark method singleValuedReadBenchMarkV2.
public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception {
boolean signed = false;
boolean isMmap = false;
long start, end;
boolean fullScan = true;
boolean batchRead = true;
boolean singleRead = true;
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "benchmarking");
com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader(heapBuffer, numDocs, numBits, signed);
if (fullScan) {
DescriptiveStatistics stats = new DescriptiveStatistics();
ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
RandomAccessFile raf = new RandomAccessFile(file, "r");
raf.getChannel().read(buffer);
raf.close();
int[] input = new int[numBits];
int[] output = new int[32];
int numBatches = (numDocs + 31) / 32;
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
for (int i = 0; i < numBatches; i++) {
for (int j = 0; j < numBits; j++) {
input[j] = buffer.getInt(i * numBits * 4 + j * 4);
}
BitPacking.fastunpack(input, 0, output, 0, numBits);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println(" v2 full scan stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
if (singleRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
// sequential read
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int value = reader.getInt(i);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println(" v2 sequential single read for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
if (batchRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
int batchSize = Math.min(5000, numDocs);
int[] output = new int[batchSize];
int[] rowIds = new int[batchSize];
// sequential read
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
int rowId = 0;
while (rowId < numDocs) {
int length = Math.min(batchSize, numDocs - rowId);
for (int i = 0; i < length; i++) {
rowIds[i] = rowId + i;
}
reader.getIntBatch(rowIds, output, length);
rowId = rowId + length;
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v2 sequential batch read stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
reader.close();
}
use of com.linkedin.pinot.core.segment.memory.PinotDataBuffer in project pinot by linkedin.
the class FilePerIndexDirectory method getReadBufferFor.
private PinotDataBuffer getReadBufferFor(IndexKey key) throws IOException {
if (indexBuffers.containsKey(key)) {
return indexBuffers.get(key).duplicate();
}
File filename = getFileFor(key.name, key.type);
PinotDataBuffer buffer = mapForReads(filename, key.type.toString() + ".reader");
indexBuffers.put(key, buffer);
return buffer.duplicate();
}
Aggregations